LinuxQuestions.org - Trouble with pacemaker, drbd and nfs

Ahoi,

trying for a while to get this stuff up and running but somehow I appear to be missing stuff...

pacemaker will for its life not start drbd, and therefore NFS will fail too...

perhaps I just need another set of eyes having a look at this, as mine are falling asleep (11:15 pm and still in office ;) )

this is what I get from
# crm verify -L -V

Code:



crm_verify[11776]: 2012/03/16_23:17:12 WARN: unpack_rsc_op: Processing failed op fs_drbd_start_0 on wkse13p1xynfs01: unknown error (1)

crm_verify[11776]: 2012/03/16_23:17:12 WARN: unpack_rsc_op: Processing failed op nfs_share_start_0 on wkse13p1xynfs01: unknown error (1)

crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing fs_drbd away from wkse13p1xynfs02 after 1000000 failures (max=1000000)

crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing fs_drbd away from wkse13p1xynfs01 after 1000000 failures (max=1000000)

crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing nfs_share away from wkse13p1xynfs01 after 1000000 failures (max=1000000)

crm_verify[11776]: 2012/03/16_23:17:12 ERROR: clone_rsc_order_lh_non_clone: Unknown task: fs_drbd_promote_0

crm_verify[11776]: 2012/03/16_23:17:12 ERROR: clone_rsc_order_rh_non_clone: Unknown action: fs_drbd_demote_0

the pacemaker conf:

Code:

node $id="b10a7cb5-2d73-4bf6-a8ce-301bf0a61d62" wkse13p1xynfs01

node $id="c594c1ea-d70d-464a-921e-b0aba4f455a6" wkse13p1xynfs02

primitive clusterIP ocf:heartbeat:IPaddr2 \

        params ip="10.26.29.237" nic="eth0:2" \

        op monitor interval="5s"

primitive clusterIParp ocf:heartbeat:SendArp \

        params ip="10.26.29.237" nic="eth0:2"

primitive drbd_disk ocf:linbit:drbd \

        params drbd_resource="nfs" \

        op monitor interval="15s" \

        op start interval="0" timeout="240" \

        op stop interval="0" timeout="240"

primitive fs_drbd ocf:heartbeat:Filesystem \

        params device="/dev/drbd0" directory="/drbd" fstype="ext3" \

        op start interval="0" timeout="240" \

        op stop interval="0" timeout="240" \

        meta target-role="Started"

primitive nfs_share ocf:heartbeat:nfsserver \

        params nfs_ip="10.26.29.237" nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_notify_cmd="/sbin/rpc.statd" \

        op start interval="0" timeout="240" \

        op stop interval="0" timeout="240"

group IP clusterIP clusterIParp \

        meta target-role="Started"

group Misc nfs_share \

        meta target-role="Started"

ms ms_drbd drbd_disk \

        meta master-max="1" master-node-max="1" clone-max="1" clone-node-max="1" notify="true"

colocation drbd-with-IP inf: ms_drbd:Master IP

colocation mnt_on_master inf: fs_drbd ms_drbd:Master

colocation nfs_on_master inf: Misc ms_drbd:Master

order ip-before-arp inf: clusterIP:start clusterIParp:start

order ip-before-drbd inf: clusterIP:start ms_drbd:promote

order mount-after-drbd inf: ms_drbd:start fs_drbd:promote

property $id="cib-bootstrap-options" \

        dc-version="1.0.12-unknown" \

        cluster-infrastructure="Heartbeat" \

        expected-quorum-votes="1" \

        stonith-enabled="false" \

        no-quorum-policy="ignore"

drbd conf

Code:

/etc/drbd.conf



#

# please have a a look at the example configuration file in

# /usr/share/doc/drbd83/drbd.conf

#

#global {

#      usage-count no;

#}

#common {

#      protocol C;

#}



include "/etc/drbd.d/global_common.conf";

include "/etc/drbd.d/*.res";



/etc/drbd.d/global_common.conf





global {

        usage-count no;

        # minor-count dialog-refresh disable-ip-verification

}



common {

        protocol C;



        handlers {

                pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";

                pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";

                local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";

                # fence-peer "/usr/lib/drbd/crm-fence-peer.sh";

                # split-brain "/usr/lib/drbd/notify-split-brain.sh root";

                # out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";

                # before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";

                # after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;

        }



        startup { degr-wfc-timeout 120; }



        disk {

                # on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes

                # no-disk-drain no-md-flushes max-bio-bvecs

        }



        net {

                # sndbuf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers

                # max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret

                # after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork

        }



        syncer {

                # rate after al-extents use-rle cpu-mask verify-alg csums-alg

        }

}



/etc/drbd.d/nfs.res



resource nfs {

        handlers {

                split-brain "/usr/lib/drbd/notify-split-brain.sh root";

        }

        device /dev/drbd0;

        disk /dev/sdb;

        meta-disk internal;

        syncer {

                rate 100M;

        }

        on wkse13p1xynfs01 {

                address 10.26.29.238:7790;

        }

        on wkse13p1xynfs02 {

                address 10.26.29.239:7790;

        }

        net {

                cram-hmac-alg sha1;

                shared-secret "thisis4lulz53cr37b3ingv!s!bl34nd411";

                after-sb-0pri discard-least-changes;

                after-sb-1pri consensus;

                after-sb-2pri disconnect;

        }

        disk {

                on-io-error detach;

        }

package versions:

Code:

resource-agents-1.0.4-1.1.el5

heartbeat-libs-3.0.3-2.el5

heartbeat-3.0.3-2.el5

drbd83-8.3.12-2.el5.centos

cluster-glue-libs-1.0.6-1.6.el5

cluster-glue-1.0.6-1.6.el5

pacemaker-libs-1.0.12-1.el5.centos

pacemaker-1.0.12-1.el5.centos

kmod-drbd83-8.3.12-1.el5.centos

so yeah, if anyone sees anything which I'm too blind too see right now, would appreciate it if you'd post it here :)

Cheers