Distributed nix build: split large package into many derivations

aka: nix-distcc, nix-bazel

moved from Incremental builds - #11 by milahu
related splitBuildInstall: split buildPhase and installPhase for large packages
related Caching local compilation results for all compiled packages - #5 by jonringer

concept

1. run cmake and make in dry-run mode
2. get compile commands from compile_commands.json
3. get link commands from CMakeFiles/sometargetname.dir/link.txt
4. generate one derivation per compilation object → compile-derivations
5. generate one derivation per link target → link-derivations
6. pass compiled objects as buildInputs to the link-derivations
7. pass linked executables/libraries as buildInputs to the package-derivation

sample project

ninvaders

  • is using cmake build tool
  • has multiple source files: 7 *.c files
  • has only 1 link target - could be more, but meh

compile commands

use compiledb -n make to generate a compile_commands.json file

bear is another popular tool for this job, but bear has no dry run mode
the result of compiledb is similar to make --dry-run | grep gcc

link commands

to produce the ninvaders binary,
we need the link command in CMakeFiles/ninvaders.dir/link.txt

gcc CMakeFiles/ninvaders.dir/aliens.c.o CMakeFiles/ninvaders.dir/globals.
c.o … -o ninvaders /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so …

build tree

to parse the build tree / dependency tree

docs: cmake file api

mkdir -p .cmake/api/v1/query
touch .cmake/api/v1/query/codemodel-v2
cmake .

ls .cmake/api/v1/reply
codemodel-v2-68e4e0ffd43539f5beb8.json
directory-.-6563d967dbe05c00b1a9.json
index-2021-11-07T12-58-37-0461.json
target-ninvaders-49e8513ef38dff190c55.json

working prototype

using compile commands from compile_commands.json
and link command from CMakeFiles/ninvaders.dir/link.txt

i use symlink where possible, to reduce overhead from file copying

{ lib, stdenv, fetchFromGitHub, cmake, ncurses, python3, python3Packages, gcc }:

let
configurePhaseDrv =
stdenv.mkDerivation rec {
  pname = "ninvaders";
  version = "0.1.2";
  name = "${pname}-${version}-configurePhase";

  src = fetchFromGitHub {
    owner = "sf-refugees";
    repo = pname;
    rev = "v${version}";
    sha256 = "1wmwws1zsap4bfc2439p25vnja0hnsf57k293rdxw626gly06whi";
  };

  nativeBuildInputs = [
    cmake
    python3
    python3Packages.compiledb
  ];
  buildInputs = [ ncurses ];

  configurePhase = ''
    cd /build; mkdir build; cd build

    mkdir -p .cmake/api/v1/query
    touch .cmake/api/v1/query/codemodel-v2

    # CMakeLists.txt -> Makefile
    cmake ../$sourceRoot

    mv .cmake/api/v1/reply/index-*.json .cmake/api/v1/reply/index.json
    # probably the files in reply/ are not reproducible, but we could patch them

    # Makefile -> compile_commands.json
    compiledb -n make

    cp -r /build $out
    printf "%s" "$sourceRoot" >$out/sourceRoot.txt
  '';

  dontBuild = true;
  dontCheck = true;
  dontInstall = true;
  dontDist = true;
};
in

let
drv2 = rec {
  sourceRoot = builtins.readFile "${configurePhaseDrv}/sourceRoot.txt";
  sourcePath = /* lib.traceValSeq */ "${configurePhaseDrv}/${sourceRoot}";

  configureResultDir = "${configurePhaseDrv}/build";
  compileCommands = /* lib.traceValSeq */ (builtins.fromJSON (builtins.readFile "${configureResultDir}/compile_commands.json"));

  compileObjects = lib.imap0 compileObjectOfCommand compileCommands;

  compileObjectOfCommand = commandIdx: command: (stdenv.mkDerivation {
    # avoid using configurePhaseDrv pname and version?
    # re-use the compile-object across different versions (and pnames)
    #name = "${configurePhaseDrv.pname}-${configurePhaseDrv.version}-obj${builtins.toString commandIdx}";
    name = "compileobject-${builtins.baseNameOf command.file}";

    # FIXME use only the needed inputs. avoid recompile when buildInputs change
    inherit (configurePhaseDrv) buildInputs;

    nativeBuildInputs = [ gcc ]; # note: no cmake
    src = configurePhaseDrv.out;
    buildCommand = ''
      ln -s $src/source /build/source
      mkdir /build/build
      ln -s $src/build/CMakeFiles /build/build/

      cd /build/build
      argsRaw=(${lib.escapeShellArgs command.arguments})

      # debug
      if false; then
      echo "command.directory = ${command.directory}"
      echo "command.file = ${command.file}"
      echo "command.arguments = ''${argsRaw[@]}"
      fi

      args=()
      for a in "''${argsRaw[@]}"; do
        if (echo "$a" | grep -E '^CMakeFiles/([^/]+\.dir)' >/dev/null); then
          args+=("$(echo "$a" | sed -E "s,^CMakeFiles/([^/]+\.dir),$out/\1,")")
          outDir="$(echo "$a" | sed -E "s,^CMakeFiles/([^/]+\.dir)/.*$,\1,")"
          outPath="$out/$outDir"
          if [ ! -d "$outPath" ]; then mkdir -p "$outPath"; fi
        else
          args+=("$a")
        fi
      done
      echo "''${args[@]}"
      "''${args[@]}"
    '';
  });

  cmakeReplyDir = "${configureResultDir}/.cmake/api/v1/reply";
  cmakeIndex = builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/index.json");
  cmakeCodemodel = builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/${cmakeIndex.reply.codemodel-v2.jsonFile}");
  # TODO multiple configurations?
  cmakeConfiguration = (builtins.elemAt cmakeCodemodel.configurations 0);
  #cmakeConfiguration.directories[0].jsonFile
  #cmakeConfiguration.projects[0].name == "ninvaders"
  targets = builtins.map (target: builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/${target.jsonFile}")) cmakeConfiguration.targets;
};
in

stdenv.mkDerivation {
  inherit (configurePhaseDrv) pname version;
  src = configurePhaseDrv.out;

  buildCommand = ''
    cp -r $src/* /build; chmod -R +w /build

    objList=()
    ${lib.concatMapStringsSep "\n" (obj: ''objList+=("${obj}")'') drv2.compileObjects}
    for o in "''${objList[@]}"; do
      echo "obj $o"
      cp -rs $o/* /build/build/CMakeFiles
    done

    mkdir -p $out/bin
    cd /build/build
    for targetDir in CMakeFiles/*.dir;
    do
      targetName=''${targetDir%.dir}
      targetName=''${targetName##*/}

      linkCommand="$(cat "$targetDir/link.txt")"
      echo "linking $targetName"
      echo "$linkCommand"
      $linkCommand

      cp -v $targetName $out/bin
    done
  '';
}

this will build 8 derivations = 7 compile objects + 1 link target
the 8 derivations can be distributed across multiple build machines, aka distcc

build output

the build is distributed between localhost and laptop3

nix-build . -A ninvaders

building '/nix/store/dc5ikqii3rm8hpg683458z8gng5r9qpa-ninvaders-0.1.2-configurePhase.drv' on 'ssh://laptop3'...
copying 6 paths...
copying path '/nix/store/19s295848nb8z5fsx2y5vynk5jifw4yg-source' to 'ssh://laptop3'...
copying path '/nix/store/3gbknbks7f4p4fca41hrffra2xp9fnqw-python3.9-enum-compat-0.0.3' to 'ssh://laptop3'...
copying path '/nix/store/flpj82wfqpzzbvhpbakk1gksw6hziw1q-python3.9-click-8.0.1' to 'ssh://laptop3'...
copying path '/nix/store/kij2088a9jrk00v44bg59132rkyxjc09-python3.9-shutilwhich-1.1.0' to 'ssh://laptop3'...
copying path '/nix/store/rj8nvqadg13yhhy6w991nvx0m0s5cas0-python3.9-bashlex-0.15' to 'ssh://laptop3'...
copying path '/nix/store/2rvhljxwjf951sr7qb9p8rpyajk59di6-python3.9-compiledb-0.10.1' to 'ssh://laptop3'...
unpacking sources
unpacking source archive /nix/store/19s295848nb8z5fsx2y5vynk5jifw4yg-source
source root is source
patching sources
configuring
-- The C compiler identification is GNU 10.3.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Looking for cbreak in /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so
-- Looking for cbreak in /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so - found
-- Looking for nodelay in /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so
-- Looking for nodelay in /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so - found
-- Found Curses: /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so  
-- Configuring done
-- Generating done
-- Build files have been written to: /build/build
post-installation fixup
shrinking RPATHs of ELF executables and libraries in /nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase
shrinking /nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase/build/CMakeFiles/3.21.2/CompilerIdC/a.out
shrinking /nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase/build/CMakeFiles/3.21.2/CMakeDetermineCompilerABI_C.bin
strip is /nix/store/a4mmjm3bblxwp8h53bcfx3dly80ib0ba-binutils-2.35.1/bin/strip
patching script interpreter paths in /nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase
checking for references to /build/ in /nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase...
copying 1 paths...
copying path '/nix/store/wwms9qlx94aajs5y212gyfjxzq7mwmrx-ninvaders-0.1.2-configurePhase' from 'ssh://laptop3'...
these 8 derivations will be built:
  /nix/store/26w6vm64wwwdj1c2vylbryxf5i6m6qvf-compileobject-aliens.c.drv
  /nix/store/3439k745qvj7isr847sxl710zpmfdl8r-compileobject-nInvaders.c.drv
  /nix/store/ix6h9xfmpnjx2mwnrfcbypnx5wvmrk0y-compileobject-highscore.c.drv
  /nix/store/n7zzlrlv0lfg7sywp01aryknsy7dw47j-compileobject-ufo.c.drv
  /nix/store/wmfr6nqp8iamdndbyr55z08janynfy79-compileobject-globals.c.drv
  /nix/store/xga02lbklrqlhq6dyvl2n0vqjh9hcjh1-compileobject-view.c.drv
  /nix/store/zg0r82kghs4madc8bg2llznj2f1cpfk5-compileobject-player.c.drv
  /nix/store/83phhkx13ww324khm4bippml36dpf62f-ninvaders-0.1.2.drv
building '/nix/store/26w6vm64wwwdj1c2vylbryxf5i6m6qvf-compileobject-aliens.c.drv' on 'ssh://laptop3'...
building '/nix/store/wmfr6nqp8iamdndbyr55z08janynfy79-compileobject-globals.c.drv'...
building '/nix/store/ix6h9xfmpnjx2mwnrfcbypnx5wvmrk0y-compileobject-highscore.c.drv'...
copying 0 paths...
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/w93jrqj7gsaiqkmnbw0xa2nb8j7r7iga-compileobject-aliens.c/ninvaders.dir/aliens.c.o -MF /nix/store/w93jrqj7gsaiqkmnbw0xa2nb8j7r7iga-compileobject-aliens.c/ninvaders.dir/aliens.c.o.d -o /nix/store/w93jrqj7gsaiqkmnbw0xa2nb8j7r7iga-compileobject-aliens.c/ninvaders.dir/aliens.c.o -c /build/source/aliens.c
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/hnlgzqvkalh54xnj77dr74qij9g3498w-compileobject-globals.c/ninvaders.dir/globals.c.o -MF /nix/store/hnlgzqvkalh54xnj77dr74qij9g3498w-compileobject-globals.c/ninvaders.dir/globals.c.o.d -o /nix/store/hnlgzqvkalh54xnj77dr74qij9g3498w-compileobject-globals.c/ninvaders.dir/globals.c.o -c /build/source/globals.c
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/bd3gjrq7w6g9n7msswrgbi2863zzczjg-compileobject-highscore.c/ninvaders.dir/highscore.c.o -MF /nix/store/bd3gjrq7w6g9n7msswrgbi2863zzczjg-compileobject-highscore.c/ninvaders.dir/highscore.c.o.d -o /nix/store/bd3gjrq7w6g9n7msswrgbi2863zzczjg-compileobject-highscore.c/ninvaders.dir/highscore.c.o -c /build/source/highscore.c
/build/source/globals.c: In function 'waitForReturn':
/build/source/globals.c:69:3: warning: ignoring return value of 'fgets' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
   69 |   fgets(b, sizeof(b), stdin);
      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c: In function 'readHighScore':
/build/source/highscore.c:166:5: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  166 |     fscanf(fp_HighScore, "%[^\n]\nv%[^\n]\n\n", hs_id, hs_version);
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c:181:5: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  181 |     fscanf(fp_HighScore, "beginner\n");
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c:183:5: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  183 |     fscanf(fp_HighScore, "normal\n");
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c:185:5: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  185 |     fscanf(fp_HighScore, "expert\n");
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c: In function 'fget_HighScoreData':
/build/source/highscore.c:141:5: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  141 |     fscanf(fp, "%i %s\n", &hs_e->score, hs_e->name);
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/build/source/highscore.c:144:3: warning: ignoring return value of 'fscanf' declared with attribute 'warn_unused_result' [8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-result-Wunused-result8;;]
  144 |   fscanf(fp, "\n");
      |   ^~~~~~~~~~~~~~~~
building '/nix/store/3439k745qvj7isr847sxl710zpmfdl8r-compileobject-nInvaders.c.drv'...
building '/nix/store/zg0r82kghs4madc8bg2llznj2f1cpfk5-compileobject-player.c.drv'...
copying 1 paths...
copying path '/nix/store/w93jrqj7gsaiqkmnbw0xa2nb8j7r7iga-compileobject-aliens.c' from 'ssh://laptop3'...
building '/nix/store/n7zzlrlv0lfg7sywp01aryknsy7dw47j-compileobject-ufo.c.drv' on 'ssh://laptop3'...
waiting for a machine to build '/nix/store/xga02lbklrqlhq6dyvl2n0vqjh9hcjh1-compileobject-view.c.drv'...
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/hr7fcqp4kfl4lba9kzdjb0cisxx1mgnq-compileobject-nInvaders.c/ninvaders.dir/nInvaders.c.o -MF /nix/store/hr7fcqp4kfl4lba9kzdjb0cisxx1mgnq-compileobject-nInvaders.c/ninvaders.dir/nInvaders.c.o.d -o /nix/store/hr7fcqp4kfl4lba9kzdjb0cisxx1mgnq-compileobject-nInvaders.c/ninvaders.dir/nInvaders.c.o -c /build/source/nInvaders.c
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/mgb2zjqqvx7sslj6q97qr1hh0nz47924-compileobject-player.c/ninvaders.dir/player.c.o -MF /nix/store/mgb2zjqqvx7sslj6q97qr1hh0nz47924-compileobject-player.c/ninvaders.dir/player.c.o.d -o /nix/store/mgb2zjqqvx7sslj6q97qr1hh0nz47924-compileobject-player.c/ninvaders.dir/player.c.o -c /build/source/player.c
copying 0 paths...
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/zzhckm02vyn1dcmibdasvwgc09434hf3-compileobject-ufo.c/ninvaders.dir/ufo.c.o -MF /nix/store/zzhckm02vyn1dcmibdasvwgc09434hf3-compileobject-ufo.c/ninvaders.dir/ufo.c.o.d -o /nix/store/zzhckm02vyn1dcmibdasvwgc09434hf3-compileobject-ufo.c/ninvaders.dir/ufo.c.o -c /build/source/ufo.c
copying 1 paths...
copying path '/nix/store/zzhckm02vyn1dcmibdasvwgc09434hf3-compileobject-ufo.c' from 'ssh://laptop3'...
building '/nix/store/xga02lbklrqlhq6dyvl2n0vqjh9hcjh1-compileobject-view.c.drv' on 'ssh://laptop3'...
copying 0 paths...
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc -std=gnu11 -MD -MT /nix/store/bkycyc4hy0j73lh7n8548ls3037b73f6-compileobject-view.c/ninvaders.dir/view.c.o -MF /nix/store/bkycyc4hy0j73lh7n8548ls3037b73f6-compileobject-view.c/ninvaders.dir/view.c.o.d -o /nix/store/bkycyc4hy0j73lh7n8548ls3037b73f6-compileobject-view.c/ninvaders.dir/view.c.o -c /build/source/view.c
copying 1 paths...
copying path '/nix/store/bkycyc4hy0j73lh7n8548ls3037b73f6-compileobject-view.c' from 'ssh://laptop3'...
building '/nix/store/83phhkx13ww324khm4bippml36dpf62f-ninvaders-0.1.2.drv' on 'ssh://laptop3'...
copying 4 paths...
copying path '/nix/store/bd3gjrq7w6g9n7msswrgbi2863zzczjg-compileobject-highscore.c' to 'ssh://laptop3'...
copying path '/nix/store/hnlgzqvkalh54xnj77dr74qij9g3498w-compileobject-globals.c' to 'ssh://laptop3'...
copying path '/nix/store/hr7fcqp4kfl4lba9kzdjb0cisxx1mgnq-compileobject-nInvaders.c' to 'ssh://laptop3'...
copying path '/nix/store/mgb2zjqqvx7sslj6q97qr1hh0nz47924-compileobject-player.c' to 'ssh://laptop3'...
obj /nix/store/w93jrqj7gsaiqkmnbw0xa2nb8j7r7iga-compileobject-aliens.c
obj /nix/store/hnlgzqvkalh54xnj77dr74qij9g3498w-compileobject-globals.c
obj /nix/store/bd3gjrq7w6g9n7msswrgbi2863zzczjg-compileobject-highscore.c
obj /nix/store/hr7fcqp4kfl4lba9kzdjb0cisxx1mgnq-compileobject-nInvaders.c
obj /nix/store/mgb2zjqqvx7sslj6q97qr1hh0nz47924-compileobject-player.c
obj /nix/store/zzhckm02vyn1dcmibdasvwgc09434hf3-compileobject-ufo.c
obj /nix/store/bkycyc4hy0j73lh7n8548ls3037b73f6-compileobject-view.c
linking ninvaders
/nix/store/s5hkav7whndbfz0szshpb46h4idqdq9a-gcc-wrapper-10.3.0/bin/gcc CMakeFiles/ninvaders.dir/aliens.c.o CMakeFiles/ninvaders.dir/globals.c.o CMakeFiles/ninvaders.dir/highscore.c.o CMakeFiles/ninvaders.dir/nInvaders.c.o CMakeFiles/ninvaders.dir/player.c.o CMakeFiles/ninvaders.dir/ufo.c.o CMakeFiles/ninvaders.dir/view.c.o -o ninvaders  /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libncurses.so /nix/store/jzjqfff4cldlm2wpld313a2s1v6r3ycb-ncurses-6.2/lib/libform.so 
'ninvaders' -> '/nix/store/377wkpx4qf2dzsar0hjaxfcs5h83cxdn-ninvaders-0.1.2/bin/ninvaders'
copying 1 paths...
copying path '/nix/store/377wkpx4qf2dzsar0hjaxfcs5h83cxdn-ninvaders-0.1.2' from 'ssh://laptop3'...
/nix/store/377wkpx4qf2dzsar0hjaxfcs5h83cxdn-ninvaders-0.1.2

./result/bin/ninvaders
3 Likes

i tried to use this, to compile a large package, with 2 hours compile time

problem is, buildPhase is using files generated by configurePhase,
so every time i change configurePhase, the *whole* cache becomes invalid

so, i would have to do this for every compile object:

  1. resolve include paths (recursive)
  2. copy only the needed source files into a separate derivation
  3. use only that derivation as source to compile the object

i give up at this point, death by complexity

much simpler to use ccache, mounted into the build sandbox,
see ccache in nixos wiki

for completeness, here is my broken objcacheStdenv.mkDerivation

{ lib
, stdenv
, python3
, python3Packages
, gcc
}:

let

# WONTFIX the *whole* cache becomes invalid when configurePhase is re-evaluated,
# because every "compileobject" derivation sees the full source tree,
# not just the few files that are actually used
# -> we would have to trace included files,
# and split the source ...
# or, use absolute include paths to /nix/store/...
# instead of /build/...
# -> just use ccache https://nixos.wiki/wiki/CCache

# TODO make nix more silent with remote builders
# waiting for a machine to build '/nix/store/3kj4p5339sdhks6j90w0lr90ss2xlxpz-compileobject-qtheaders.cpp.drv'...
# is printed repeatedly for ALL compile objects -> noise
# https://github.com/NixOS/nix/issues/5876

nameOfAttrs = attrs:
  # based on nixpkgs/pkgs/stdenv/generic/make-derivation.nix
  # removed: staticMarker hostSuffix
  if attrs ? name
  then attrs.name
  else "${attrs.pname}-${attrs.version}"
;

pkgsNew = {

  # objcacheStdenv
  # drop-in replacement for stdenv
  #   a: stdenv.mkDerivation
  #   b: objcacheStdenv.mkDerivation
  # split compilation into many derivations
  # cache every compile object in the nix store
  # based on https://discourse.nixos.org/t/distributed-nix-build-split-large-package-into-many-derivations/15979
  # limitations:
  #   only works with cmake

  objcacheStdenv.mkDerivation = (attrs:
    let
      objcacheSource = stdenv.mkDerivation (attrs // {
        name = (nameOfAttrs attrs) + "-objcacheSource";
        dontConfigure = true;
        dontBuild = false;
        dontCheck = true;
        dontInstall = true;
        dontFixup = true;
        dontDist = true;
        postPhases = "";
        outputs = [ "out" ];
        buildPhase = ''
          echo "objcacheSource buildPhase: move /build/* to $out"
          mkdir -p $out/build
          mv /build/* $out/build/
          echo "objcacheSource buildPhase: sourceRoot = $sourceRoot"
          printf "%s" "$sourceRoot" >$out/sourceRoot.txt
        '';
      });

      sourceRoot = builtins.readFile "${objcacheSource}/sourceRoot.txt";
      #sourcePath = /* lib.traceValSeq */ "${objcacheConfig}/${sourceRoot}";
      #sourcePath = /* lib.traceValSeq */ objcacheSource;

      # TODO merge objcacheSource and objcacheConfig?
      # objcacheConfig: create build folder for out-of-tree build
      objcacheConfig = stdenv.mkDerivation (
        attrs // {
          name = (nameOfAttrs attrs) + "-objcacheConfig";
          # this drv has no source
          # we dont need write access -> avoid copying files
          #dontUnpack = true; # -> ignore src https://github.com/NixOS/nixpkgs/issues/23099
          outputs = [ "out" ];
          unpackPhase = ''
            #ln -s ${objcacheSource}/build/${sourceRoot} /build/
            mkdir /build/${sourceRoot}
            echo "objcacheConfig unpackPhase: symlink ${objcacheSource} to /build"
            echo "NOTE sources are read-only"
            shopt -s dotglob # also symlink hidden files
            ln -s ${objcacheSource}/build/${sourceRoot}/* /build/${sourceRoot}/
            echo "objcacheConfig unpackPhase: ls /build"; ls /build
          '';
          dontPatch = true;
          sourceRoot = sourceRoot;
          # only run configurePhase
          dontBuild = false;
          dontCheck = true;
          dontInstall = true;
          dontFixup = true;
          dontDist = true;
          postPhases = "";
          nativeBuildInputs = attrs.nativeBuildInputs ++ [
            python3
            python3Packages.compiledb
          ];
          buildPhase = ''
            echo "objcacheConfig buildPhase"
            #set -o xtrace

            #stat Makefile

            # Makefile -> compile_commands.json
            compiledb -n make

            #stat compile_commands.json

            #cat compile_commands.json; exit 1

            #stat /build/${sourceRoot}/*
            # should be symlink
            # why "mv" fails to move?

            #echo "objcacheConfig buildPhase: ls /build"; ls -l /build
            #echo "objcacheConfig buildPhase: ls /build/${sourceRoot}"; ls -l /build/${sourceRoot}

            #mkdir $out
            #mv /build/* $out/
            # FIXME mv: cannot remove '/build/qtbase-everywhere-src-6.2.2/build/lib/cmake/Qt6/3rdparty/extra-cmake-modules/modules/ECMFindModuleHelpers.cmake': Permission denied

            #mkdir -p $out/${sourceRoot}
            #mv /build/${sourceRoot}/* $out/${sourceRoot}/
            # FIXME mv: cannot remove '/build/qtbase-everywhere-src-6.2.2/build/lib/cmake/Qt6/3rdparty/extra-cmake-modules/modules/ECMFindModuleHelpers.cmake': Permission denied

            mkdir $out
            cp -ra /build $out/
          '';
        }
      );

      # TODO list or attrs?
      compileObjectList = (
        let
          configureResultDir = "${objcacheConfig}/build/${sourceRoot}/build";
          compileCommands = /* lib.traceValSeq */ (builtins.fromJSON (builtins.readFile "${configureResultDir}/compile_commands.json"));

          compileObjectOfCommand = commandIdx: command: (stdenv.mkDerivation {
            # avoid using objcacheConfig pname and version?
            # re-use the compile-object across different versions (and pnames)
            #name = "${objcacheConfig.pname}-${objcacheConfig.version}-obj${builtins.toString commandIdx}";
            #name = "compileobject-${builtins.baseNameOf command.file}";
            name = "objcache-${builtins.baseNameOf command.file}";
            # FIXME use only the needed inputs. avoid recompile when buildInputs change
            #inherit (objcacheConfig) buildInputs propagatedBuildInputs;
            inherit (objcacheSource) buildInputs propagatedBuildInputs;
            nativeBuildInputs = [ gcc ]; # note: no cmake
            dontUnpack = true; # no source: avoid file copy

            # note: use objcacheConfig as source -> access files produced in configurePhase
            buildCommand = ''
              ln -s ${objcacheConfig}/build/${sourceRoot} /build/${sourceRoot}
              mkdir /build/build
              ln -s ${objcacheConfig}/build/build/CMakeFiles /build/build/

              cd /build/build
              argsRaw=(${lib.escapeShellArgs command.arguments})

              # debug
              if false; then
              echo "command.directory = ${command.directory}"
              echo "command.file = ${command.file}"
              echo "command.arguments = ''${argsRaw[@]}"
              fi

              args=()
              for a in "''${argsRaw[@]}"; do
                if (echo "$a" | grep -E '^CMakeFiles/([^/]+\.dir)/' >/dev/null); then
                  args+=("$out/$a")
                  outDir="$(dirname "$a")"
                  outPath="$out/$outDir"
                  #echo "file ${command.file} -> a = $a"
                  #echo "file ${command.file} -> outDir = $outDir"
                  #echo "file ${command.file} -> outPath = $outPath"
                  if [ ! -d "$outPath" ]; then mkdir -p "$outPath"; fi
                else
                  args+=("$a")
                fi
              done

              # compile
              "''${args[@]}" || {
                echo "compile failed"
                echo "command.directory = ${command.directory}"
                echo "command.file = ${command.file}"
                echo "command.arguments = ''${argsRaw[@]}"
                echo "objcacheSource = ${objcacheSource}"
                echo "objcacheConfig = ${objcacheConfig}"
                echo "arguments:"
                echo "''${args[@]}"
                exit 1
              }
            '';
            # TODO tolerate compile errors? no need to be perfect,
            # just compile "most" objects to populate cmake cache
          });

          cmakeReplyDir = "${configureResultDir}/.cmake/api/v1/reply";
          cmakeIndex = builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/index.json");
          cmakeCodemodel = builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/${cmakeIndex.reply.codemodel-v2.jsonFile}");
          # TODO multiple configurations?
          cmakeConfiguration = (builtins.elemAt cmakeCodemodel.configurations 0);
          #cmakeConfiguration.directories[0].jsonFile
          #cmakeConfiguration.projects[0].name == "ninvaders"
          targets = builtins.map (target: builtins.fromJSON (builtins.readFile "${cmakeReplyDir}/${target.jsonFile}")) cmakeConfiguration.targets;
        in
        lib.imap0 compileObjectOfCommand compileCommands
      );

      buildDrv = stdenv.mkDerivation (
        attrs // {
          prePhases = "";
          dontPatch = true;
          sourceRoot = sourceRoot;
          # TODO skip configurePhase? patch output paths ...
          # for this, we must keep the original outputs in the first drv,
          # and create dummy outputs to appease nix
          unpackPhase = ''
            cp -r ${objcacheConfig}/build/* /build/
            chmod -R +w /build

            # debug
            #echo "buildDrv: ls /build"; ls /build
            #echo "buildDrv: ls /build/${sourceRoot}"; ls /build/${sourceRoot}
            #echo "buildDrv: ls /build/${sourceRoot}/build"; ls /build/${sourceRoot}/build

            objList=()
            ${lib.concatMapStringsSep "\n" (obj: ''objList+=("${obj}")'') compileObjectList}
            for o in "''${objList[@]}"; do
              echo "objcache load $o/"

              # debug
              #( cd $o && find . -type f )
              #( cd $o && find . -name '*.d' -exec stat '{}' \; )

              #cp -rs $o/* /build/${sourceRoot}/build/CMakeFiles/
              #cp -rs $o/* /build/${sourceRoot}/build/ # TODO symlink only *.o files
              cp -r $o/* /build/${sourceRoot}/build/ # no symlink, we need write access to *.d files
              chmod -R +w /build/${sourceRoot}/build/
              # FIXME CMakeFiles/cue.dir/cd.c.o.d file must be writable for make
            done

          '';
        }
      );
    in
    buildDrv
  );
};

in pkgsNew.objcacheStdenv
2 Likes