With my limited knowledge, I’m trying to figure out how to configure a PostgreSQL database in my dev environment, created via a shell.nix file. Currently, in shellHook, I edit postgresql.conf with sed right after calling initdb, but there has to be a better solution, especially if many options need to configured.
Tried passing the configuration options directly to pg_ctl but they don’t seem to take effect:
this seems to work perfectly.
But this options won’t alter your postgresql.conf.
You need to verify the settings within the Database: e.g. SHOW logging_collector;
I wasn’t expecting postgresql.conf to change, and checking options with postgres -C always returned off, but that was because I didn’t read the postgres man page careful enough (emphases mine):
-C name
Prints the value of the named run-time parameter, and exits. (See the -c option above for details.) This can be used on a running server, and returns values from postgresql.conf , modified by any parameters supplied in this invocation. It does not reflect parameters supplied when the cluster was started.
Plus, I also checked whether the default log/ directory exists with a logfile, after invoking nix-shell, but I messed that up too. I was playing around with quotes so many times (thinking that they were the problem) that I left single quotes quoted (see first post), and ended up with log directory name "'log'"/ and "'postgresql-2019-09-17_135959.log'"… The correct invocation is
Hey, I do run Postgres using Nix on Ubuntu. My approach is container-based deployment and extra-container tool.
So, extra-container is:
self: super: {
extra-container = super.stdenv.mkDerivation {
name = "extra-container-amplicare";
src = super.fetchurl {
url = "https://github.com/erikarvstedt/extra-container/archive/0.2.tar.gz";
sha256 = "1gz7rl2hx8kbmy5wdyai75mbscr0v61da4h7paz3j3yzfk6p7wf9";
};
propagatedBuildDeps = [ self.nixos-container ];
buildCommand = ''
unpackPhase && cd "$sourceRoot"
# extra-container was designed for NixOS. We have to adapt it for Ubuntu
sed -i 's|/etc/systemd-mutable|$((uname -a \|grep -q NixOS) \&\& echo /etc/systemd-mutable\|\| echo /usr/lib/systemd)|g' extra-container
mkdir -p $out/bin
cp extra-container $out/bin/extra-container
patchShebangs $out/bin
'';
};
}
The container config is:
let
mounts = {
pg-monster = {
mountPoint = "/db";
hostPath = "/db/postgresql/pg-monster";
isReadOnly = false;
};
host-socket = {
mountPoint = "/var/run/postgresql";
hostPath = "/var/run/postgresql";
isReadOnly = false;
};
ssl-certificate = {
mountPoint = "/etc/ssl/certs/ssl-cert-snakeoil.pem";
hostPath = "/etc/ssl/certs/ssl-cert-snakeoil.pem";
};
ssl-key = {
mountPoint = "/etc/ssl/private/ssl-cert-snakeoil.key";
hostPath = "/etc/ssl/private/ssl-cert-snakeoil.key";
};
pg_stat_tmp = "/run/pg_stat_tmp";
};
in {
imports = [ ./nix-config/host-base.nix ];
containers.pg-monster = {
autoStart = true;
tmpfs = [
mounts.pg_stat_tmp
];
bindMounts = {
inherit (mounts)
pg-monster
host-socket
ssl-certificate
ssl-key
;
};
config = { pkgs, config, lib, ... }: {
imports = [
./nix-config/container-base.nix
];
services.postgresql = {
enable = true;
package = pkgs.postgresql_11;
extraPlugins = with config.services.postgresql.package.pkgs; [
pg_repack
];
dataDir = mounts.pg-monster.mountPoint;
port = 5432;
enableTCPIP = true;
extraConfig = ''
# CONFIG REFERENCE (with defaults)
# https://github.com/postgres/postgres/blob/40ad4202513c72f5c1beeb03e26dfbc8890770c0/src/backend/utils/misc/postgresql.conf.sample
listen_addresses = '*'
unix_socket_directories = '/tmp,${mounts.host-socket.mountPoint}'
max_connections = 100
shared_buffers = 10GB
huge_pages = try
temp_buffers = 64MB
work_mem = 64MB
maintenance_work_mem = 4GB
autovacuum_work_mem = 128MB
max_worker_processes = 24
max_parallel_maintenance_workers = 8
max_parallel_workers_per_gather = 8
max_parallel_workers = 8
synchronous_commit = off
effective_cache_size = 300GB
default_statistics_target = 1000
jit = off # slows done things actually
logging_collector = on
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
log_min_duration_statement = 1
log_filename = 'postgresql-%Y-%m-%d.log'
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
log_temp_files = 0
log_autovacuum_min_duration = 0
log_error_verbosity = default
#log_min_messages = debug5
stats_temp_directory = '${mounts.pg_stat_tmp}'
ssl = true
ssl_cert_file = '${mounts.ssl-certificate.mountPoint}'
ssl_key_file = '${mounts.ssl-key.mountPoint}'
# those options should minimize WAL generation when inserting data
# (WAL slows down everything, because it touches disk. But we still
# cannot go with UNLOGGED tables, because those don't survive reboot)
wal_level = minimal
archive_mode = off
max_wal_senders = 0
wal_compression = on
max_wal_size = 4GB
'';
authentication = ''
# We trust ubuntu user, because he has sudo without password
# He can connect/dump bypassing this authentication file
local all postgres peer
local all root peer
'';
};
systemd.services.postgresql.preStart = ''
chown postgres:postgres -R ${mounts.pg_stat_tmp}
'';
systemd.services.postgresql.postStart =
let cfg = config.services.postgresql;
in lib.mkAfter ''
set -x
export PGPORT=${toString cfg.port}
export PGDATABASE=template1
SUDO="${pkgs.sudo}/bin/sudo -u ${cfg.superUser} -E"
function createUser {
local user="$1"
shift
$SUDO psql -tAc "SELECT 1 FROM pg_roles WHERE rolname='$user'" | grep -q 1 || $SUDO createuser $user $@
}
createUser root -s || true
chmod 750 -R ${cfg.dataDir}
'';
};
};
}
and container base is:
{ lib, ... }: {
imports = [
<nixpkgs/nixos/modules/profiles/headless.nix>
];
networking.firewall.enable = false;
documentation.nixos.enable = false;
# map Ubuntu's `postgres` user to NixOS
# We want `postgres` user in container correspond to `postgres` user in host
# So it's possible to do peer authentication.
users.users.postgres.uid = lib.mkForce 114;
users.groups.postgres.gid = lib.mkForce 120;
# map Ubuntu's ssl-cert group to NixOS to allow read SSL private key
users.users.postgres.extraGroups = [ "ssl-cert" ];
users.groups.ssl-cert.gid = 119;
}
Thank you @danbst, this looks awesome! Barely understand it, and haven’t used containers in NixOS (or at all) but this definitely seems a cleaner solution.
On the other hand, why would you recommend using containers instead of nix-shell? Is it because nix-shell kind of like chroot?
Forgive my ignorance, still fairly new to this, and right now I only use NixOS and Nix for development as setting up a dev environment is snap, but I assume the proper way to move forward in production is using containers.
Kinda isolation. Which is great for reproducibility
It allows NixOS-style declarative config
Note, that there is another project to extend nix-shell into NixOS config: nixos-shell by Chris. It was nice, but it requires now some love to work on Ubuntu.
It would be really nice to combine extra-container and nixos-shell tools together (ideally embed into stock nixos-container).
Is it because nix-shell kind of like chroot ?
nix-shell is nowhere like chroot. nix-shell --pure is closer.
but I assume the proper way to move forward in production is using containers.
You can run without both containers and systemd if you want, There exists a thing called “runner”:
$ cat runner.nix
with import <nixpkgs/nixos> { configuration = {
imports = [ ./configuration-test.nix ];
boot.isContainer = true;
}; };
config.systemd.services.postgresql.runner
$ cat configuration-test.nix
{ config, pkgs, ... }: {
services.postgresql.enable = true;
services.postgresql.dataDir = "/tmp/datadir";
services.postgresql.port = 5431;
system.stateVersion = "18.09";
}
$ sudo -u postgres $(nix-build runner.nix --no-out-link)
these derivations will be built:
/nix/store/jpgy6m37c9lpzj8pq234410sl8nl3q8n-unit-script-postgresql-post-start.drv
/nix/store/y2md2wq9gw49v74cxh1510lyins4wlxc-postgresql.conf.drv
/nix/store/k3qja76c9jq5wjf2j7vdbiv3qa97ipfk-unit-script-postgresql-start.drv
/nix/store/gvshan2phnxh6p74iigvy50crvdx6dpk-postgresql-runner.drv
building '/nix/store/jpgy6m37c9lpzj8pq234410sl8nl3q8n-unit-script-postgresql-post-start.drv'...
building '/nix/store/y2md2wq9gw49v74cxh1510lyins4wlxc-postgresql.conf.drv'...
building '/nix/store/k3qja76c9jq5wjf2j7vdbiv3qa97ipfk-unit-script-postgresql-start.drv'...
building '/nix/store/gvshan2phnxh6p74iigvy50crvdx6dpk-postgresql-runner.drv'...
perl: warning: Setting locale failed.
perl: warning: Please check that your locale settings:
LANGUAGE = (unset),
LC_ALL = "en_US.UTF-8",
LC_CTYPE = "en_US.UTF-8",
LANG = "en_US.UTF-8"
are supported and installed on your system.
perl: warning: Falling back to the standard locale ("C").
running ExecStartPre: /nix/store/8jr8k2nnhal3rsi75pws0nyjlvafzijb-unit-script-postgresql-pre-start
running ExecStart: /nix/store/n44a8135hq4kp6fvp5dxl67i8y29z6zr-unit-script-postgresql-start
running ExecStartPost: /nix/store/blpjf150p5j98fvm989gk0kp8qk11c2b-unit-script-postgresql-post-start
LOG: database system was shut down at 2019-09-18 19:08:03 GMT
LOG: MultiXact member wraparound protections are now enabled
LOG: database system is ready to accept connections
LOG: autovacuum launcher started
I don’t use it myself.
PS. In general, NixOS (not Nix, NixOS!) experience on Ubuntu is bad. I’ve recorded my current setup in NixOS-like experience on Ubuntu in AWS. So if you find containers difficult to use on Ubuntu, then don’t bother with that and use what is easiest for you.
You just gave me a couple weeks worth of reading, and now I understand the reason behind choosing NixOS containers at least.
On local machines I always use NixOS, but have a couple projects running on Google Cloud Engine, and choose Ubuntu only to get started (quick and dirty) until I figure out NixOps to spin up NixOS instances.
then build NixOS files on that node (nixos-infect covers this)
then replace bootloader and reboot (nixos-infect covers this)
new bootloader does “swap” and launches NixOS (NIXOS_LUSTRATE covers this)
Probably the only important thing is to not forget to imports = [ <nixpkgs/nixos/modules/virtualisation/google-compute-config.nix> ]; in desired NixOS configuration.
I always seem to get only the following lines in the error log:
2020-03-31 09:06:07.980 UTC [15760] LOG: database system was shut down at 2020-03-31 09:05:11 UTC
2020-03-31 09:06:07.994 UTC [15758] LOG: database system is ready to accept connections
2020-03-31 09:09:06.071 UTC [15758] LOG: received fast shutdown request
2020-03-31 09:09:06.135 UTC [15758] LOG: aborting any active transactions
2020-03-31 09:09:06.137 UTC [15758] LOG: background worker "logical replication launcher" (PID 15766) exited with exit code 1
2020-03-31 09:09:06.137 UTC [15761] LOG: shutting down
2020-03-31 09:09:06.536 UTC [15758] LOG: database system is shut down
Also, as per danbst’s suggestion:
POSTGRESUJAH=# show log_min_error_statement ;
log_min_error_statement
-------------------------
debug5
(1 row)
This happens with absolutely 0 user intervention. So for example, I notice a shutdown, then I start the server again and come here to make the post so no shell sessions closed et al, and then that server is dead again.
Were you able to find the solution for this? I wasn’t able to re-create this (that would only happen if I tried it in production, for sure…). Started googling (see below), but none of these offer a solution (and you probably already found them already):
Thank you for trying to help. I’ve tried quite a bit but I think nothing was directly applicable and nothing worked for sure. I’m beginning to slowly wonder this may have to do something with the locale. I will update here. One other direction I’m loooking is that when I made this post- I deleted all the log files and the server ran for multiple hours flawlessly before me manually shutting it , as opposed to usually a few minutes to maybe an hour. I havd to investigate that too.
it is very strange. With DEBUG5 pg should shit tons of logs, on every query. Are you sure you have logging properly configured?
Line “received fast shutdown request” suggests PG is stopped explicitly, it is not crashed. Check full system log around that time, maybe any clues which other services can do that.
I’ve seen similar situation when you have, for example, postScript configured and there is an error in postScript. Then PG is started, postScript fails, systemd recides whole service had failed so it stops PG. You may have some different issue…
I did do a journalctl earlier and there was nothing in there at all that could be called suspicious. The only logs were from my onedrive service. (I did not check dmesg, I should probably do that, I don’t know much about Linux administration). And there was even a failure just in the time I made an earlier post with no other action than typing in firefox.
About postgres logs themselves, here’s the content that appears in postgres.log:
2020-04-02 10:18:58.218 UTC [11024] LOG: listening on IPv4 address "0.0.0.0", port 7397
2020-04-02 10:18:58.233 UTC [11024] LOG: listening on IPv6 address "::", port 7397
2020-04-02 10:18:58.263 UTC [11024] LOG: listening on Unix socket "/code/zx/workspace/projects/jkjkjkjk/postgres/.s.PGSQL.7397"
2020-04-02 10:18:58.380 UTC [11024] LOG: redirecting log output to logging collector process
2020-04-02 10:18:58.380 UTC [11024] HINT: Future log output will appear in directory "log".
Here is the content of one such file:
2020-03-29 16:20:37.625 UTC [20000] LOG: database system was shut down at 2020-03-28 16:35:33 UTC
2020-03-29 16:20:37.732 UTC [19998] LOG: database system is ready to accept connections
2020-03-29 16:23:03.023 UTC [19998] LOG: received fast shutdown request
2020-03-29 16:23:03.101 UTC [19998] LOG: aborting any active transactions
2020-03-29 16:23:03.102 UTC [19998] LOG: background worker "logical replication launcher" (PID 20006) exited with exit code 1
2020-03-29 16:23:03.102 UTC [20001] LOG: shutting down
2020-03-29 16:23:03.214 UTC [19998] LOG: database system is shut down
For me, the shutdown times are random, usually about an hour, sometimes a few minutes and then one time for maybe 5-6 hours.
About logging being configured correctly- this is what i get in postgres:
UJAH=# show log_min_error_statement ;
log_min_error_statement
-------------------------
debug5
(1 row)
Time: 143.378 ms
Thank you for working with me, I tried this and see a lot of statements. I’m waiting for the server to crash (since the last 30 minutes, and cynically wondering if it ever will!)
EDIT: There has been about 6 more hours of uptime (in the 24h that have passed) and the server still hasn’t crashed.
EDIT2: Almost 7 hours of uptime (in the 1.5d that have passed) and no crash!
EDIT3: Almost 15 hours of uptime (in the 2.5d that have passed) and no crash…
EDIT4: Almost 24 hours of uptime (in the 3d that have passed) and no crash…