LinuxQuestions.org

LinuxQuestions.org (/questions/)
-   Linux - Server (https://www.linuxquestions.org/questions/linux-server-73/)
-   -   Trouble with pacemaker, drbd and nfs (https://www.linuxquestions.org/questions/linux-server-73/trouble-with-pacemaker-drbd-and-nfs-934858/)

rsciw 03-16-2012 06:21 PM

Trouble with pacemaker, drbd and nfs
 
Ahoi,

trying for a while to get this stuff up and running but somehow I appear to be missing stuff...

pacemaker will for its life not start drbd, and therefore NFS will fail too...

perhaps I just need another set of eyes having a look at this, as mine are falling asleep (11:15 pm and still in office ;) )

this is what I get from
# crm verify -L -V
Code:


crm_verify[11776]: 2012/03/16_23:17:12 WARN: unpack_rsc_op: Processing failed op fs_drbd_start_0 on wkse13p1xynfs01: unknown error (1)
crm_verify[11776]: 2012/03/16_23:17:12 WARN: unpack_rsc_op: Processing failed op nfs_share_start_0 on wkse13p1xynfs01: unknown error (1)
crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing fs_drbd away from wkse13p1xynfs02 after 1000000 failures (max=1000000)
crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing fs_drbd away from wkse13p1xynfs01 after 1000000 failures (max=1000000)
crm_verify[11776]: 2012/03/16_23:17:12 WARN: common_apply_stickiness: Forcing nfs_share away from wkse13p1xynfs01 after 1000000 failures (max=1000000)
crm_verify[11776]: 2012/03/16_23:17:12 ERROR: clone_rsc_order_lh_non_clone: Unknown task: fs_drbd_promote_0
crm_verify[11776]: 2012/03/16_23:17:12 ERROR: clone_rsc_order_rh_non_clone: Unknown action: fs_drbd_demote_0

the pacemaker conf:
Code:

node $id="b10a7cb5-2d73-4bf6-a8ce-301bf0a61d62" wkse13p1xynfs01
node $id="c594c1ea-d70d-464a-921e-b0aba4f455a6" wkse13p1xynfs02
primitive clusterIP ocf:heartbeat:IPaddr2 \
        params ip="10.26.29.237" nic="eth0:2" \
        op monitor interval="5s"
primitive clusterIParp ocf:heartbeat:SendArp \
        params ip="10.26.29.237" nic="eth0:2"
primitive drbd_disk ocf:linbit:drbd \
        params drbd_resource="nfs" \
        op monitor interval="15s" \
        op start interval="0" timeout="240" \
        op stop interval="0" timeout="240"
primitive fs_drbd ocf:heartbeat:Filesystem \
        params device="/dev/drbd0" directory="/drbd" fstype="ext3" \
        op start interval="0" timeout="240" \
        op stop interval="0" timeout="240" \
        meta target-role="Started"
primitive nfs_share ocf:heartbeat:nfsserver \
        params nfs_ip="10.26.29.237" nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_notify_cmd="/sbin/rpc.statd" \
        op start interval="0" timeout="240" \
        op stop interval="0" timeout="240"
group IP clusterIP clusterIParp \
        meta target-role="Started"
group Misc nfs_share \
        meta target-role="Started"
ms ms_drbd drbd_disk \
        meta master-max="1" master-node-max="1" clone-max="1" clone-node-max="1" notify="true"
colocation drbd-with-IP inf: ms_drbd:Master IP
colocation mnt_on_master inf: fs_drbd ms_drbd:Master
colocation nfs_on_master inf: Misc ms_drbd:Master
order ip-before-arp inf: clusterIP:start clusterIParp:start
order ip-before-drbd inf: clusterIP:start ms_drbd:promote
order mount-after-drbd inf: ms_drbd:start fs_drbd:promote
property $id="cib-bootstrap-options" \
        dc-version="1.0.12-unknown" \
        cluster-infrastructure="Heartbeat" \
        expected-quorum-votes="1" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"

drbd conf
Code:

/etc/drbd.conf

#
# please have a a look at the example configuration file in
# /usr/share/doc/drbd83/drbd.conf
#
#global {
#      usage-count no;
#}
#common {
#      protocol C;
#}

include "/etc/drbd.d/global_common.conf";
include "/etc/drbd.d/*.res";

/etc/drbd.d/global_common.conf


global {
        usage-count no;
        # minor-count dialog-refresh disable-ip-verification
}

common {
        protocol C;

        handlers {
                pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
                pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
                local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
                # fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
                # split-brain "/usr/lib/drbd/notify-split-brain.sh root";
                # out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
                # before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
                # after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
        }

        startup { degr-wfc-timeout 120; }

        disk {
                # on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes
                # no-disk-drain no-md-flushes max-bio-bvecs
        }

        net {
                # sndbuf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers
                # max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret
                # after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork
        }

        syncer {
                # rate after al-extents use-rle cpu-mask verify-alg csums-alg
        }
}

/etc/drbd.d/nfs.res

resource nfs {
        handlers {
                split-brain "/usr/lib/drbd/notify-split-brain.sh root";
        }
        device /dev/drbd0;
        disk /dev/sdb;
        meta-disk internal;
        syncer {
                rate 100M;
        }
        on wkse13p1xynfs01 {
                address 10.26.29.238:7790;
        }
        on wkse13p1xynfs02 {
                address 10.26.29.239:7790;
        }
        net {
                cram-hmac-alg sha1;
                shared-secret "thisis4lulz53cr37b3ingv!s!bl34nd411";
                after-sb-0pri discard-least-changes;
                after-sb-1pri consensus;
                after-sb-2pri disconnect;
        }
        disk {
                on-io-error detach;
        }

package versions:

Code:

resource-agents-1.0.4-1.1.el5
heartbeat-libs-3.0.3-2.el5
heartbeat-3.0.3-2.el5
drbd83-8.3.12-2.el5.centos
cluster-glue-libs-1.0.6-1.6.el5
cluster-glue-1.0.6-1.6.el5
pacemaker-libs-1.0.12-1.el5.centos
pacemaker-1.0.12-1.el5.centos
kmod-drbd83-8.3.12-1.el5.centos

so yeah, if anyone sees anything which I'm too blind too see right now, would appreciate it if you'd post it here :)

Cheers


All times are GMT -5. The time now is 10:21 PM.