Hello!
We recently conducted tests with DRBD version 9.2.13 in a Kubernetes environment, during which we observed that all replicas became outdated. For your reference, the kernel logs and the resource dump are attached.
Test Overview
- Pod and PVC Creation:
- A large number of pods is created in a loop by deploying corresponding PersistentVolumeClaims (PVCs) and deployments.
- Initially, the pods are scheduled on nodes that host the DRBD data replicas.
- Pod Relocation
- The script then moves the pods to nodes that do not host local DRBD replicas.
- This relocation is performed by checking the DRBD status and patching the deployments to force the pods onto nodes without local replicas.
- Chaos Monkey – DRBD Connection Disruption
The script repeatedly executes a chaos monkey routine that simulates network disruptions affecting DRBD connections. For each iteration:
- Node Iteration: The script iterates over the nodes in a random order.
- Traffic Drop: On each node, it identifies the DRBD resource ports used by the test-created PVCs and applies iptables rules to drop traffic on both the INPUT and OUTPUT chains.
- Random Break Duration: The connection disruption lasts for a random duration ranging from 15 to 45 seconds.
- Traffic Restoration: After the break, the iptables rules are removed to restore traffic.
- Random Pause: The script then pauses for a random duration between 5 to 15 seconds before processing the next node.
During the tests, all replicas of one of the resources switched to an outdated state.
Recovery of these resources remains unclear. I attempted to recover them by performing drbdadm disconnect
and drbdadm connect
, as well as by disabling quorum with the command:
drbdsetup resource-options $resource_name --quorum off
However, this approach did not help because on the node with the diskless replica the process hangs in a D state, making it impossible to recover the resource without rebooting the node.
I would appreciate any insights or suggestions on how to effectively recover these resources—specifically, if anyone has encountered a similar issue with a diskless replica hanging in a D state and found a workaround that avoids rebooting the node, and whether it is even possible to prevent the occurrence of outdated replicas altogether.
Kernel logs:
Resource dump:
drbdadm dump pvc-742a2fd2-a6de-4450-a600-dac7693c758d
# resource pvc-742a2fd2-a6de-4450-a600-dac7693c758d on storage-load-test-0: not ignored, not stacked
# defined at /var/lib/linstor.d/pvc-742a2fd2-a6de-4450-a600-dac7693c758d.res:6
resource pvc-742a2fd2-a6de-4450-a600-dac7693c758d {
on storage-load-test-0 {
node-id 3;
volume 0 {
disk {
discard-zeroes-if-aligned no;
rs-discard-granularity 4096;
}
device minor 1002;
disk none;
meta-disk internal;
}
}
on storage-load-test-1 {
node-id 1;
volume 0 {
disk {
discard-zeroes-if-aligned no;
rs-discard-granularity 4096;
}
device minor 1002;
disk /dev/drbd/this/is/not/used;
meta-disk internal;
}
}
on storage-load-test-2 {
node-id 0;
volume 0 {
disk {
discard-zeroes-if-aligned no;
rs-discard-granularity 4096;
}
device minor 1002;
disk /dev/drbd/this/is/not/used;
meta-disk internal;
}
}
on storage-load-test-3 {
node-id 2;
volume 0 {
disk {
discard-zeroes-if-aligned no;
rs-discard-granularity 4096;
}
device minor 1002;
disk /dev/drbd/this/is/not/used;
meta-disk internal;
}
}
connection {
host storage-load-test-0 address ipv4 172.17.1.2:7002;
host storage-load-test-1 address ipv4 172.17.1.3:7002;
net {
_name storage-load-test-1;
}
}
connection {
host storage-load-test-0 address ipv4 172.17.1.2:7002;
host storage-load-test-2 address ipv4 172.17.1.4:7002;
net {
_name storage-load-test-2;
}
}
connection {
host storage-load-test-0 address ipv4 172.17.1.2:7002;
host storage-load-test-3 address ipv4 172.17.1.5:7002;
net {
_name storage-load-test-3;
}
}
options {
on-no-data-accessible suspend-io;
on-no-quorum suspend-io;
on-suspended-primary-outdated force-secondary;
quorum majority;
quorum-minimum-redundancy 2;
}
net {
cram-hmac-alg sha1;
shared-secret yDRvjadO/pjhwazpAHUF;
protocol C;
rr-conflict retry-connect;
verify-alg crct10dif-pclmul;
}
}
resource status
linstor r l -r pvc-742a2fd2-a6de-4450-a600-dac7693c758d
╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
┊ ResourceName ┊ Node ┊ Port ┊ Usage ┊ Conns ┊ State ┊ CreatedOn ┊
╞═════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
┊ pvc-742a2fd2-a6de-4450-a600-dac7693c758d ┊ storage-load-test-0 ┊ 7002 ┊ InUse ┊ Ok ┊ Diskless ┊ 2025-03-30 22:07:58 ┊
┊ pvc-742a2fd2-a6de-4450-a600-dac7693c758d ┊ storage-load-test-1 ┊ 7002 ┊ Unused ┊ Ok ┊ Outdated ┊ 2025-03-30 22:05:02 ┊
┊ pvc-742a2fd2-a6de-4450-a600-dac7693c758d ┊ storage-load-test-2 ┊ 7002 ┊ Unused ┊ Ok ┊ Outdated ┊ 2025-03-30 22:04:59 ┊
┊ pvc-742a2fd2-a6de-4450-a600-dac7693c758d ┊ storage-load-test-3 ┊ 7002 ┊ Unused ┊ Ok ┊ Outdated ┊ 2025-03-30 22:05:02 ┊
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
drbdadm status pvc-742a2fd2-a6de-4450-a600-dac7693c758d
pvc-742a2fd2-a6de-4450-a600-dac7693c758d role:Primary suspended:no-data,quorum
disk:Diskless quorum:no open:yes blocked:upper
storage-load-test-1 role:Secondary
peer-disk:Outdated
storage-load-test-2 connection:StandAlone
storage-load-test-3 role:Secondary
peer-disk:Outdated
drbdadm disconnect pvc-742a2fd2-a6de-4450-a600-dac7693c758d
drbdadm status pvc-742a2fd2-a6de-4450-a600-dac7693c758d
pvc-742a2fd2-a6de-4450-a600-dac7693c758d role:Primary suspended:no-data,quorum
disk:Diskless quorum:no open:yes blocked:upper
storage-load-test-1 connection:StandAlone
storage-load-test-2 connection:StandAlone
storage-load-test-3 connection:StandAlone
drbdadm connect pvc-742a2fd2-a6de-4450-a600-dac7693c758d
[root@storage-load-test-0 /]# drbdadm status pvc-742a2fd2-a6de-4450-a600-dac7693c758d
pvc-742a2fd2-a6de-4450-a600-dac7693c758d role:Primary suspended:no-data,quorum
disk:Diskless quorum:no open:yes blocked:upper
storage-load-test-1 role:Secondary
peer-disk:Outdated
storage-load-test-2 role:Secondary
peer-disk:Outdated
storage-load-test-3 role:Secondary
peer-disk:Outdated