I have a 3 node drbd cluster managed by pacemaker/corosync. When testing the configuration, I promote two to primary to migrate vm’s with clustered lvm.
Then I reboot the 3rd node to simulate a failure.
When the server rejoins the pacemaker cluster, the drbd resource comes up in inconsistent state, but both primaries reject the state change. How can I allow the rebooted node to become secondary/resync with two primaries? I’ve tried several config tweaks, here is the latest.
Used 9.2.12, trying 9.3.0rc1 for testing
resource drbd_vol01 {
options {
auto-promote no;
# on-no-quorum suspend-io;
on-suspended-primary-outdated force-secondary;
# quorum 1;#majority;
# quorum-minimum-redundancy 1;
disk {
al-extents 3833;
al-updates no;
c-plan-ahead 30; # Increased to 30
c-max-rate 700M; # Increased to 700 MB/s
c-min-rate 215M;
c-fill-target 60M; # Increased to 60 MB
c-delay-target 20;
resync-rate 215M;
disk-barrier no;
disk-flushes no;
rs-discard-granularity 1048576;
}
net {
max-buffers 50k;
sndbuf-size 4M; # Increased to 4 MB
rcvbuf-size 4M; # Increased to 4 MB
max-epoch-size 20000; # Increased to 20000
timeout 300;
ko-count 0;
connect-int 30;
ping-int 30;
ping-timeout 100;
allow-two-primaries yes;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
always-asbp yes;
fencing resource-only;
protocol C;
rr-conflict call-pri-lost;
csums-alg crc32c;
verify-alg crc32c;
csums-after-crash-only yes;
}
volume 0 {
device /dev/drbd0;
disk /dev/zvol/zfs_datapool01/cluster/drbd_vol01;
meta-disk internal;
}
on sphf2node01 {
address 10.98.64.255:7789;
node-id 1;
}
on sphf2node02 {
address 10.98.64.254:7789;
node-id 2;
}
on sphf2node03 {
address 10.98.64.253:7789;
node-id 3;
}
connection-mesh {
hosts sphf2node01 sphf2node02 sphf2node03;
}
}