Hi,
I’m building a NixOS system for an appliance as a QEMU disk image and I’m having issues with tmpfiles not being applied (or wrongly applied, or a race condition, I really don’t know).
I was greatly and mostly inspired from the make-disk-image utility provided by nixpkgs, but wrote something different since I need two disks and btrfs.
Some context
Basically my image builder looks like this:
{
lib,
substitute,
closureInfo,
runCommand,
stdenv,
vmTools,
writeShellApplication,
btrfs-progs,
gptfdisk,
nix,
nixos-enter,
parted,
qemu_kvm,
util-linux,
inputs,
appliancePkgs,
}:
let
name = "my-appliance";
version = "1.0.0";
rootDiskImage = "${name}.qcow2";
rootDiskImageRaw = "${name}.raw";
dataDiskImage = "${name}.data.qcow2";
dataDiskImageRaw = "${name}.data.raw";
systemConfig =
(inputs.nixpkgs.lib.nixosSystem {
inherit (stdenv.hostPlatform) system;
modules = [ ./config ];
specialArgs = {
inherit inputs appliancePkgs;
applianceVersion = version;
};
}).config;
binPath = lib.makeBinPath [
qemu_kvm
];
closureInfo' = closureInfo {
rootPaths = [ systemConfig.system.build.toplevel ];
};
startScript = writeShellApplication {
name = "run-appliance";
text = builtins.readFile (substitute {
src = ./script.sh;
substitutions = [
"--replace-fail"
"@IMAGE_NAME@"
rootDiskImage
];
});
};
in
vmTools.runInLinuxVM (
runCommand name
{
preVM = ''
PATH=${binPath}:$PATH
mkdir -p $out
qemu-img create -f raw ${rootDiskImageRaw} 20G
qemu-img create -f raw ${dataDiskImageRaw} 50G
'';
memSize = 1024;
QEMU_OPTS = lib.concatStringsSep " " [
"-drive if=virtio,file=${rootDiskImageRaw},format=raw"
"-drive if=virtio,file=${dataDiskImageRaw},format=raw"
];
buildInputs = [
btrfs-progs
gptfdisk
nix
nixos-enter
parted
systemConfig.system.build.nixos-install
util-linux
];
postVM = ''
qemu-img convert -f raw -O qcow2 -c ${rootDiskImageRaw} $out/${rootDiskImage} -m $(nproc) -W
qemu-img convert -f raw -O qcow2 -c ${dataDiskImageRaw} $out/${dataDiskImage} -m $(nproc) -W
mkdir -p $out/bin
install -m0555 ${lib.getExe startScript} $out/bin
'';
passthru = {
config = systemConfig;
run = startScript;
};
}
''
parted --script /dev/vda -- \
mklabel gpt \
mkpart no-fs 1MiB 2MiB \
set 1 bios_grub on \
align-check optimal 1 \
mkpart primary btrfs 2MiB -1MiB \
align-check optimal 2 \
print
sgdisk \
--disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \
--partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
--partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
/dev/vda
sfdisk --dump /dev/vda
parted --script /dev/vdb -- \
mklabel gpt \
mkpart primary btrfs 0% 100% \
align-check optimal 1 \
print
sgdisk \
--disk-guid=4748F60F-50C6-4ECC-A0FD-4AD8ABC00BAB \
--partition-guid=1:B1D8B29E-44CC-4E14-B52F-16DB00547D91 \
/dev/vdb
sfdisk --dump /dev/vdb
lsblk -o NAME,SIZE,TYPE,FSTYPE,LABEL
mkfs.btrfs -L "root" /dev/vda2
mkdir -p /mnt/vda
mount /dev/vda2 /mnt/vda
btrfs subvolume create /mnt/vda/@root
btrfs subvolume create /mnt/vda/@nix
btrfs subvolume list /mnt/vda
umount /mnt/vda
rmdir /mnt/vda
mkfs.btrfs -L "data" /dev/vdb1
mkdir -p /mnt/vdb
mount /dev/vdb1 /mnt/vdb
btrfs subvolume create /mnt/vdb/@machine-ssh
btrfs subvolume create /mnt/vdb/@nixos-state
btrfs subvolume create /mnt/vdb/@systemd-state
btrfs subvolume create /mnt/vdb/@log-journal
btrfs subvolume create /mnt/vdb/@home
btrfs subvolume list /mnt/vdb
umount /mnt/vdb
rmdir /mnt/vdb
mkdir -p /mnt
mount -t btrfs -o subvol=@root /dev/vda2 /mnt
mkdir -p /mnt/nix /mnt/var/lib/nixos /mnt/var/lib/systemd /mnt/var/log/journal /mnt/home /mnt/etc/ssh
mount -t btrfs -o subvol=@nix /dev/vda2 /mnt/nix
mount -t btrfs -o subvol=@machine-ssh /dev/vdb1 /mnt/etc/ssh
mount -t btrfs -o subvol=@nixos-state /dev/vdb1 /mnt/var/lib/nixos
mount -t btrfs -o subvol=@systemd-state /dev/vdb1 /mnt/var/lib/systemd
mount -t btrfs -o subvol=@log-journal /dev/vdb1 /mnt/var/log/journal
mount -t btrfs -o subvol=@home /dev/vdb1 /mnt/home
ls -la /mnt
export NIX_STATE_DIR=$TMPDIR/state
nix-store --load-db < ${closureInfo'}/registration
chmod 755 "$TMPDIR"
nixos-install \
--root /mnt \
--no-bootloader \
--no-root-passwd \
--system ${systemConfig.system.build.toplevel} \
--no-channel-copy \
--substituters ""
export HOME=$TMPDIR
NIXOS_INSTALL_BOOTLOADER=1 nixos-enter --root /mnt -- /nix/var/nix/profiles/system/bin/switch-to-configuration boot
umount -R /mnt
''
)
Now this works fine, and can load up the appliance NixOS configuration to build two qcow2 images:
my-appliance.qcow2: main disk (root) where NixOS is installed (it has two btrfs subvolumes:@rootand@nix, respectively mounted at/and/nix)my-appliance.data.qcow2: secondary disk (data) where persistent data should be written (it has several btrfs subvolumes for/home,/etc/ssh,/var/lib/systemd,/var/log/journaland/var/lib/nixos)
The idea is that I can run a preconfigured NixOS image with a separated data disk on any system that can run qemu (basically anything from Linux, MacOS and even Windows), and freely replace the root disk whenever I update the system without disrupting user and system data that should be persisted.
The NixOS config is a bit huge and not publicly available, but basically it:
- configures a GNOME DE with GNOME RDP enabled (not configured yet, I currently use QEMU VNC window to test the system)
- runs on Wayland
- sets up some basic programs/services (zsh, starship, git, podman, chromium, firefox, nerd fonts, node, java, go, vscode, intellij, …)
- disables some irrelevant defaults for an appliance (nix docs since there is no nix in the final system, dlna, power profiles, bluetooth, thunderbolt support, geolocation services, fstrim, some GNOME apps, and more…)
I don’t think the NixOS configuration is the culprit here, but I may be wrong.
The issues
Now on to the issues I’m having. They are mostly related to tmpfiles. There are two issues, for which I found a fix but it feels more like a band-aid, hence this post.
Avahi daemon
The first issue is with Avahi daemon (which is, if I’m right, somehow required by GNOME to work properly). When I start the system for the first time, the avahi daemon is complaining that it can’t create its runtime directory:
Failed to create runtime directory /run/avahi-daemon/
If I restart the system, the daemon can find its directory and starts normally, along with the rest of the system.
I fixed this by forcing systemd-tmp-files-resetup service to run before the avahi-daemon service:
{
systemd.services.avahi-daemon = {
requires = [ "systemd-tmpfiles-resetup.service" ];
after = [ "systemd-tmpfiles-resetup.service" ];
};
}
And now it works flawlessly, even on first boot.
XWayland
The second issue is with XWayland. After fixing avahi issue, I’m dropped in GDM, where I cannot interact at all with the UI. Again, if I restart the system it works…
Looking at the logs, the issue is once again related to tmpfiles, because XWayland is complaining that there are incorrect permissions on the /tmp/.X11-unix directory:
failed to start x wayland: wrong ownership for directory "/tmp/.X11-unix"
Indeed, the directory belongs to gdm:gdm on first start. But on the second start, it belongs to root:root and therefore x wayland runs fine, I can connect normally to my user and be dropped in a working GNOME shell Wayland session with all my programs set up and working fine.
Once again, I fixed this with a band-aid that doesn’t feel right:
{
systemd.tmpfiles.rules = [
"d /tmp/.X11-unix 1777 root root -"
];
}
This doesn’t feel right because this directory is (or should, at least) already be created by the x11.conf tmpfile that already exist in the fs:
# This file is part of systemd.
#
# systemd is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
# See tmpfiles.d(5) for details
# Make sure these are created by default so that nobody else can
# or empty them at startup
D! /tmp/.X11-unix 1777 root root 10d
D! /tmp/.ICE-unix 1777 root root 10d
D! /tmp/.XIM-unix 1777 root root 10d
D! /tmp/.font-unix 1777 root root 10d
# Unlink the X11 lock files
r! /tmp/.X[0-9]*-lock
Conclusion
Now, I “fixed” both of these issues with some band-aids, but it just feels wrong that I should have to do this.
I’m pretty sure the NixOS configuration is not the culprit here, but the way I’m building the image is. However, I don’t see what could be the root cause, since in system logs I can see the systemd-tmpfiles-resetup service being run early on (well before avahi-daemon or GNOME session starts), even on the first boot.
Any help on this would be greatly appreciated! I can share parts of the system config if that’s of any help btw.
Thanks for reading and sorry for the long post.