After a SplitBrain a cluster with 2 nodes Corosync Pacemaker DRDB a strange thing happens:
When I migrate the resource from one node to another node on the Slave I disconnect the resource DRDB and comes off the disc.
So I have to reconnect to the resource and attach to the DRDB.
CRM:
---------------------------------------------------------------------
node nodo1 \
attributes standby="off"
node nodo2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.1.112" nic="eth0" cidr_netmask="32" mac="EA:33:3E:EE:9B:E8" \
operations $id="ClusterIP-operations" \
op monitor interval="10" timeout="20" start-delay="0" \
meta target-role="started"
primitive res_Filesystem_1 ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/condivisa" fstype="ext4" \
operations $id="res_Filesystem_1-operations" \
op start interval="0" timeout="60" \
op stop interval="0" timeout="60" \
op monitor interval="20" timeout="40" start-delay="0" \
op notify interval="0" timeout="60" \
meta target-role="started" allow-migrate="true"
primitive res_drbd_1 ocf:linbit:drbd \
params drbd_resource="zimbra" \
operations $id="res_drbd_1-operations" \
op start interval="0" timeout="240" \
op promote interval="0" timeout="90" \
op demote interval="0" timeout="90" \
op stop interval="0" timeout="100" \
op monitor interval="10" timeout="20" start-delay="0" \
op notify interval="0" timeout="90" \
meta allow-migrate="true"
ms ms_drbd_1 res_drbd_1 \
meta clone-max="2" notify="true" interleave="true" allow-migrate="true"
location cli-standby-ClusterIP ClusterIP \
rule $id="cli-standby-rule-ClusterIP" -inf: #uname eq nodo1
colocation col_ms_drbd_1_ClusterIP inf: ms_drbd_1 ClusterIP
colocation col_res_Filesystem_1_ClusterIP inf: res_Filesystem_1 ClusterIP
colocation col_res_Filesystem_1_ms_drbd_1 inf: res_Filesystem_1 ms_drbd_1:Master
order ord_ClusterIP_ms_drbd_1 inf: ClusterIP ms_drbd_1
order ord_ClusterIP_res_Filesystem_1 inf: ClusterIP res_Filesystem_1
order ord_ms_drbd_1_res_Filesystem_1 inf: ms_drbd_1
romote res_Filesystem_1:start
property $id="cib-bootstrap-options" \
dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore" \
last-lrm-refresh="1345734393"
rsc_defaults $id="rsc-options" \
resource-stickiness="1000"
---------------------------------------------------------------------
DRDB:
[root@nodo1 ~]# cat /etc/drbd.d/global_common.conf
## generated by drbd-gui
global {
usage-count yes;
}
common {
startup {
degr-wfc-timeout 0;
}
net {
cram-hmac-alg sha1;
shared-secret hRKjl5CuM7MflXohYfLwt1nCJxessj4u;
}
disk {
on-io-error detach;
}
}
---------------------------------------------------------------------
This is the problem after migration:
cat /proc/drbd
version: 8.3.13 (api:88/proto:86-96)
GIT-hash: 83ca112086600faacab2f157bc5a9324f7bd7f77 build by dag@Build64R6, 2012-05-07 11:52:13
0: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C r-----
ns:0 nr:8 dw:12 dr:825 al:0 bm:1 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:4
-------------------------------------------------------------------
LOG:
Aug 23 18:12:31 nodo2 lrmd: [1094]: info: cancel_op: operation monitor[85] on ocf::Filesystem::res_Filesystem_1 for client 1097, its parameters: fstype=[ext4] device=[/dev/drbd0] crm_feature_set=[3.0.6] directory=[/condivisa] CRM_meta_name=[monitor] CRM_meta_start_delay=[0] CRM_meta_timeout=[40000] CRM_meta_interval=[20000] cancelled
Aug 23 18:12:31 nodo2 Filesystem(res_Filesystem_1)[1902]: INFO: Running stop for /dev/drbd0 on /condivisa
Aug 23 18:12:32 nodo2 kernel: block drbd0: role( Primary -> Secondary )
Aug 23 18:12:32 nodo2 kernel: block drbd0: bitmap WRITE of 0 pages took 0 jiffies
Aug 23 18:12:32 nodo2 kernel: block drbd0: 0 KB (0 bits) marked out-of-sync by on disk bit-map.
Aug 23 18:12:32 nodo2 kernel: block drbd0: peer( Secondary -> Unknown ) conn( Connected -> Disconnecting ) pdsk( UpToDate -> DUnknown )
Aug 23 18:12:32 nodo2 kernel: block drbd0: asender terminated
Aug 23 18:12:32 nodo2 kernel: block drbd0: Terminating asender thread
Aug 23 18:12:32 nodo2 kernel: block drbd0: Connection closed
Aug 23 18:12:32 nodo2 kernel: block drbd0: conn( Disconnecting -> StandAlone )
Aug 23 18:12:32 nodo2 kernel: block drbd0: receiver terminated
Aug 23 18:12:32 nodo2 kernel: block drbd0: Terminating receiver thread
Aug 23 18:12:32 nodo2 kernel: block drbd0: disk( UpToDate -> Failed )
Aug 23 18:12:32 nodo2 kernel: block drbd0: disk( Failed -> Diskless )
Aug 23 18:12:32 nodo2 kernel: block drbd0: drbd_bm_resize called with capacity == 0
Aug 23 18:12:32 nodo2 kernel: block drbd0: worker terminated
Aug 23 18:12:32 nodo2 kernel: block drbd0: Terminating worker thread
-------------------------------------------------------------
Thankss
-- Yena --