1. 先設 hosts (兩台都做)
192.168.0.68 linux-ad1.tw3.ad linux-ad1
192.168.0.69 linux-ad2.tw3.ad linux-ad2
2. aptitude install drbd8-utils drbd-utils ocfs2-tools ocfs2console
3. /etc/drbd.d/global_common.conf (兩台都做)
# DRBD is the result of over a decade of development by LINBIT.
# In case you need professional services for DRBD or have
# feature requests visit http://www.linbit.com
global {
usage-count yes;
}
common {
startup {
wfc-timeout 0;
degr-wfc-timeout 60;
become-primary-on both;
}
net {
protocol C;
}
}
4. /etc/drbd.d/dr0.res (兩台都做)
resource r0 {
net {
allow-two-primaries;
after-sb-0pri discard-zero-changes;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
}
startup { become-primary-on both; }
device /dev/drbd1;
disk /dev/sdb;
meta-disk internal;
on linux-ad1 {
address 192.168.0.68:7789;
}
on linux-ad2 {
address 192.168.0.69:7789;
}
}
5. drbdadm create-md r0
6. drbdadm -- --overwrite-data-of-peer primary all (設 這台為 primary )
7. /etc/init.d/drbd status (看一下狀態) (兩台都做)
8. /etc/ocfs2/cluster.conf (兩台都做)
node:
ip_port = 7777
ip_address = 192.168.0.68
number = 1
name = linux-ad1
cluster = ocfs2
node:
ip_port = 7777
ip_address = 192.168.0.69
number = 2
name = linux-ad2
cluster = ocfs2
cluster:
node_count = 2
name = ocfs2
9. dpkg-reconfigure ocfs2-tools (重建 ocfs2 ...讓 config load 進去) (兩台都做)
10 . mkfs.ocfs2 --force -L "ocfs2" /dev/sdb (兩台都做)
11. mkdir /cluster (兩台都做)
12. /etc/rc.local (兩台都做)
sleep 5
mount -t ocfs2 /dev/drbd1 /cluster
13. reboot (兩台都做 , 一台一台做)
14. systemctl status rc.local (看看是否有 mount 上去 ) (兩台都做 , 不成功 就 debug ......)
rc-local.service - /etc/rc.local Compatibility
Loaded: loaded (/lib/systemd/system/rc-local.service; static; vendor preset: enabled)
Drop-In: /lib/systemd/system/rc-local.service.d
└─debian.conf
Active: active (exited) since Sat 2016-11-12 00:42:39 CST; 2 days ago
Process: 1335 ExecStart=/etc/rc.local start (code=exited, status=0/SUCCESS)
Tasks: 0
Memory: 0B
CPU: 0
Nov 12 00:42:25 linux-ad1 systemd[1]: Starting /etc/rc.local Compatibility...
Nov 12 00:42:39 linux-ad1 systemd[1]: Started /etc/rc.local Compatibility.
...........................................................................................................
後記 : 測老半天 ....哈快 4 小時 ....不知為何 在步驟 10 網路上都說 mkfs.ocfs2 -L "ocfs2" /dev/drbd1
每次 rc.local 就是會發生 找不到 drbd1 ... 也就 mount 不了 /cluster ....
最後死馬當活馬醫 ..... mkfs.ocfs2 --force -L "ocfs2" /dev/sdb 好了 ...也不知為何 ?
...........................................................................................................
debug 過程中發生了
drbd-overview
Secondary/Secondary -------------------------------------------------------
debug 過程中發生了
drbd-overview
1:r0/0 Connected Primary/Primary UpToDate/Diskless
/cluster ocfs2 50G 2.1G 48G 5%
在 Diskless 那台做
drbdadm disconnect r0
drbdadm create-md r0
drbdadm connect r0
就看到 Diskless 那台重新同步
----------------------------------------------------------
root@linux-ad1:~# drbd-overview
1:r0/0 WFConnection Secondary/Unknown Inconsistent/DUnknown C r----s
root@linux-ad1:~# modprobe drbd
root@linux-ad1:~# drbdadm up r0
Device '1' is configured!
Command 'drbdmeta 1 v08 /dev/sdb internal apply-al' terminated with exit code 20
root@linux-ad1:~# drbdadm primary --force r0
root@linux-ad1:~# drbd-overview
1:r0/0 WFConnection Primary/Unknown UpToDate/DUnknown C r----s
root@linux-ad1:~#
---------------------------------------------------------------------------------------------------------------
其我沒遇到可參考 : http://wangzhijian.blog.51cto.com/6427016/1711284
问题1:
设置primary node时提示不能成功UpToDate data:
# drbdadm primary r1
r1: State change failed: (-2) Need access to UpToDate data
Command 'drbdsetup primary r1' terminated with exit code 17
解决:
# drbdadm primary --force r1 ##强制设置
# drbd-overview
1:r1/0 Connected(1*) Primary(1*) UpToDate(1*)
问题2:
创建设备元数据时提示操作失败:
# drbdadm -c /etc/drbd.conf create-md all
md_offset 314568704
al_offset 314535936
bm_offset 314523648
Found ext3 filesystem
307200 kB data area apparently used
307152 kB left usable by current configuration
Device size would be truncated, which
would corrupt data and result in
'access beyond end of device' errors.
You need to either
* use external meta data (recommended)
* shrink that filesystem first
* zero out the device (destroy the filesystem)
Operation refused.
Command 'drbdmeta 1 v09 /dev/sdb1 internal create-md 1' terminated with exit code 40
解决:
# dd if=/dev/zero of=/dev/sdb1 bs=1M count=100
问题3:
启用资源时提示失败,无法创建transport:
# drbdadm up r1
r1: Failure: (172) Failed to create transport (drbd_transport_xxx module missing?)
Command 'drbdsetup new-peer r1 1 --_name=node2 --shared-secret=123456 --cram-hmac-alg=sha1 --protocol=C' terminated with exit code 10
drbdadm: new-path r1: skipped due to earlier error
解决:
安装drbd_transport_tcp模块
# cp drbd_transport_tcp.ko /lib/modules/3.10.0-229.el7.x86_64/kernel/lib/
# depmod
# modprobe drbd_transport_tcp
# lsmod|grep drbd_transport_tcp
drbd_transport_tcp 17731 0
drbd 463948 2 drbd_transport_tcp
问题4:
启用资源时提示失败,配置要求无效:
# drbdadm up r1
r1: Failure: (162) Invalid configuration request
additional info from kernel:
minor exists as different volume
Command 'drbdsetup new-minor r1 1 1' terminated with exit code 10
解决:
安装drbd模块
# cp drbd.ko /lib/modules/3.10.0-229.el7.x86_64/kernel/lib/
# depmod
# modprobe drbd
# lsmod |grep drbd
drbd 463948 0
libcrc32c 12644 2 xfs,drbd
问题5:
启动资源时提示失败,设备或资源忙
# drbdadm up r1
No valid meta data found
Command 'drbdmeta 1 v09 /dev/sdb1 internal apply-al' terminated with exit code 255
# drbdadm create-md all
open(/dev/sdb1) failed: Device or resource busy
Exclusive open failed. Do it anyways?
[need to type 'yes' to confirm] yes
# Output might be stale, since minor 1 is attached
Device '1' is configured!
Command 'drbdmeta 1 v09 /dev/sdb1 internal create-md 1' terminated with exit code 20
解决:
fdisk /dev/sdb 删除该分区保存,然后再重新新建分区并保存即可
问题6:
提示unknown resource
# drbdadm primary r1
r1: Failure: (158) Unknown resource
additional info from kernel:
unknown resource
Command 'drbdsetup primary r1' terminated with exit code 10
解决:
# /etc/init.d/drbd start
问题7及解决:
# drbd-overview
1:r1/1 Connec/Connec Second/Unknow UpToDa/DUnkno
# systemctl stop firewalld
# drbd-overview
1:r1/1 Connec/StaAlo Second/Unknow Outdat/DUnkno
# getenforce 0
问题8:
node2无法连接node1,显示node1为StandAlone
[root@node1 ~]# drbd-overview
1:mysqldata/1 Connec/Connec Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbd-overview
1:mysqldata/1 Connec/StaAlo Second/Unknow UpToDa/DUnkno
解决:
[root@node2 ~]# drbdadm --discard-my-data connect all
[root@node2 ~]# drbd-overview
1:mysqldata/1 Connec/Connec Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbd-overview ##从上一状态转为该状态需要些时间
1:mysqldata/1 Connected(2*) Secondary(2*) UpToDa/UpToDa
问题9及解决:
node1和node2互相认为对方StandAlone
[root@node1 ~]# drbd-overview
1:mysqldata/1 Connec/StaAlo Second/Unknow UpToDa/DUnkno
[root@node1 ~]# drbdadm --discard-my-data connect all
[root@node1 ~]# drbd-overview
1:mysqldata/1 Connec/Connec Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbd-overview
1:mysqldata/1 Connec/StaAlo Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbdadm connect all
[root@node2 ~]# drbd-overview
1:mysqldata/1 Connected(2*) Secondary(2*) UpToDa/UpToDa
留言列表