Hi Devin,
Thanks a lot for your previous reply it was very helpful.
We’ve now successfully increased the resync speed from 250 KB/s to around 3 MB/s. This was achieved by tweaking the following parameters:
--c-plan-ahead=20
--resync-rate=3M
--c-min-rate=3M
--c-max-rate=6M
From what we’ve observed, both --resync-rate
and --c-min-rate
seem to define the guaranteed minimum sync speed. However, even with --c-max-rate
set to 6M, the actual resync speed remains fixed at around 3 MB/s—it doesn’t scale up to use the full allowed range.
At this point, the issue is technically resolved and our production environment is stable, with many critical services running on this DRBD setup. However, your suggestion regarding --max-buffers
caught our attention.
Would increasing --max-buffers
potentially allow the sync controller to adjust speed more dynamically between the configured min and max rates (e.g. somewhere between 3M and 6M, instead of sticking to the minimum)?
Currently, our configuration uses --max-buffers=2048
, and we’re considering increasing it to 8000
.
Are there any other parameters we should consider tuning in conjunction with --max-buffers
to further improve resync performance?
And most importantly: could increasing --max-buffers
introduce any risk of service disruption, disk I/O issues, or instability on the primary node?
Here’s a snapshot of our current configuration for reference:
esource drbd0 {
options {
cpu-mask ""; # default
on-no-data-accessible io-error; # default
auto-promote yes; # default
peer-ack-window 4096s; # bytes, default
peer-ack-delay 100; # milliseconds, default
twopc-timeout 300; # 1/10 seconds, default
twopc-retry-timeout 1; # 1/10 seconds, default
auto-promote-timeout 20; # 1/10 seconds, default
max-io-depth 8000; # default
quorum off; # default
on-no-quorum suspend-io; # default
quorum-minimum-redundancy off; # default
}
_this_host {
node-id 1;
volume 0 {
device minor 0;
disk "/dev/sdb1";
meta-disk internal;
disk {
size 0s; # bytes, default
on-io-error detach; # default
disk-barrier no; # default
disk-flushes yes; # default
disk-drain yes; # default
md-flushes yes; # default
resync-after -1; # default
al-extents 1237; # default
al-updates yes; # default
discard-zeroes-if-aligned yes; # default
disable-write-same no; # default
disk-timeout 0; # 1/10 seconds, default
read-balancing prefer-local; # default
rs-discard-granularity 0; # bytes, default
}
}
}
connection {
_peer_node_id 0;
path {
_this_host ipv4 192.168.1.9:7788;
_remote_host ipv4 192.168.1.10:7788;
}
net {
transport ""; # default
protocol C; # default
timeout 60; # 1/10 seconds, default
max-epoch-size 2048; # default
connect-int 10; # seconds, default
ping-int 10; # seconds, default
sndbuf-size 0; # bytes, default
rcvbuf-size 0; # bytes, default
ko-count 7; # default
allow-two-primaries no; # default
cram-hmac-alg ""; # default
shared-secret ""; # default
after-sb-0pri disconnect; # default
after-sb-1pri disconnect; # default
after-sb-2pri disconnect; # default
always-asbp no; # default
rr-conflict disconnect; # default
ping-timeout 5; # 1/10 seconds, default
data-integrity-alg ""; # default
tcp-cork yes; # default
on-congestion block; # default
congestion-fill 0s; # bytes, default
congestion-extents 1237; # default
csums-alg ""; # default
csums-after-crash-only no; # default
verify-alg ""; # default
use-rle yes; # default
socket-check-timeout 0; # default
fencing dont-care; # default
max-buffers 2048; # default
allow-remote-read yes; # default
_name "nodo2";
}
volume 0 {
disk {
resync-rate 3072k; # bytes/second
c-plan-ahead 20; # 1/10 seconds, default
c-delay-target 10; # 1/10 seconds, default
c-fill-target 100s; # bytes, default
c-max-rate 6144k; # bytes/second
c-min-rate 3072k; # bytes/second
bitmap yes; # default
}
}
}
}
Thanks again for your help