close

1. 先設 hosts  (兩台都做)

192.168.0.68    linux-ad1.tw3.ad        linux-ad1
192.168.0.69    linux-ad2.tw3.ad        linux-ad2

2.  aptitude install drbd8-utils drbd-utils ocfs2-tools ocfs2console 

3.  /etc/drbd.d/global_common.conf  (兩台都做)

# DRBD is the result of over a decade of development by LINBIT.
# In case you need professional services for DRBD or have
# feature requests visit http://www.linbit.com

global {
        usage-count yes;
}
common {
  startup {
                wfc-timeout 0;
                degr-wfc-timeout 60;
                become-primary-on both;
                }

 net {
    protocol C;
  }
}

4.  /etc/drbd.d/dr0.res  (兩台都做)
resource r0 {
net {
                allow-two-primaries;
                after-sb-0pri discard-zero-changes;
                after-sb-1pri discard-secondary;
                after-sb-2pri disconnect;
        }
 startup { become-primary-on both; }
 device /dev/drbd1;
 disk /dev/sdb;
 meta-disk internal;
 on linux-ad1 {
 address 192.168.0.68:7789;
 }
 on linux-ad2 {
 address 192.168.0.69:7789;
 }
}

5.  drbdadm create-md r0

6.   drbdadm -- --overwrite-data-of-peer primary all (設 這台為 primary )

7.   /etc/init.d/drbd status (看一下狀態)  (兩台都做)

8.    /etc/ocfs2/cluster.conf  (兩台都做)

node:
        ip_port = 7777
        ip_address = 192.168.0.68
        number = 1
        name = linux-ad1
        cluster = ocfs2
node:
        ip_port = 7777
        ip_address = 192.168.0.69
        number = 2
        name = linux-ad2
        cluster = ocfs2
cluster:
        node_count = 2
        name = ocfs2
9.   dpkg-reconfigure ocfs2-tools (重建 ocfs2 ...讓 config load 進去)  (兩台都做)

10 .   mkfs.ocfs2 --force -L "ocfs2" /dev/sdb (兩台都做)

11.  mkdir /cluster  (兩台都做)

12.  /etc/rc.local  (兩台都做)

sleep 5
mount -t ocfs2 /dev/drbd1 /cluster

13.  reboot  (兩台都做 , 一台一台做)

14.  systemctl status rc.local (看看是否有 mount 上去 )  (兩台都做 , 不成功 就 debug ......)

rc-local.service - /etc/rc.local Compatibility
   Loaded: loaded (/lib/systemd/system/rc-local.service; static; vendor preset: enabled)
  Drop-In: /lib/systemd/system/rc-local.service.d
           └─debian.conf
   Active: active (exited) since Sat 2016-11-12 00:42:39 CST; 2 days ago
  Process: 1335 ExecStart=/etc/rc.local start (code=exited, status=0/SUCCESS)
    Tasks: 0
   Memory: 0B
      CPU: 0

Nov 12 00:42:25 linux-ad1 systemd[1]: Starting /etc/rc.local Compatibility...
Nov 12 00:42:39 linux-ad1 systemd[1]: Started /etc/rc.local Compatibility.
 

...........................................................................................................

後記 : 測老半天 ....哈快 4 小時 ....不知為何 在步驟 10 網路上都說  mkfs.ocfs2 -L "ocfs2" /dev/drbd1

每次 rc.local 就是會發生 找不到  drbd1 ... 也就 mount 不了 /cluster ....

最後死馬當活馬醫 .....   mkfs.ocfs2 --force -L "ocfs2" /dev/sdb 好了 ...也不知為何 ?

...........................................................................................................

debug 過程中發生了 

drbd-overview 

   Secondary/Secondary 
 
用   drbdadm primary --force r1 

-------------------------------------------------------

debug 過程中發生了 

drbd-overview 

1:r0/0  Connected Primary/Primary UpToDate/Diskless  /cluster ocfs2 50G 2.1G 48G 5%

在 Diskless 那台做 

drbdadm disconnect r0

drbdadm create-md r0  

drbdadm connect r0

就看到 Diskless 那台重新同步

----------------------------------------------------------

root@linux-ad1:~# drbd-overview
  1:r0/0  WFConnection Secondary/Unknown Inconsistent/DUnknown C r----s
root@linux-ad1:~# modprobe drbd
root@linux-ad1:~# drbdadm up r0
Device '1' is configured!
Command 'drbdmeta 1 v08 /dev/sdb internal apply-al' terminated with exit code 20
root@linux-ad1:~# drbdadm primary --force r0
root@linux-ad1:~# drbd-overview
  1:r0/0  WFConnection Primary/Unknown UpToDate/DUnknown C r----s
root@linux-ad1:~#
---------------------------------------------------------------------------------------------------------------

其我沒遇到可參考 :  http://wangzhijian.blog.51cto.com/6427016/1711284

问题1:
设置primary node时提示不能成功UpToDate data:
# drbdadm primary r1      
r1: State change failed: (-2) Need access to UpToDate data
Command 'drbdsetup primary r1' terminated with exit code 17
解决:
# drbdadm primary --force r1          ##强制设置
# drbd-overview 
 1:r1/0  Connected(1*) Primary(1*) UpToDate(1*)

问题2:
创建设备元数据时提示操作失败:
# drbdadm -c /etc/drbd.conf create-md all
md_offset 314568704
al_offset 314535936
bm_offset 314523648
 
Found ext3 filesystem
      307200 kB data area apparently used
      307152 kB left usable by current configuration
       
Device size would be truncated, which
would corrupt data and result in
'access beyond end of device' errors.
You need to either
   * use external meta data (recommended)
   * shrink that filesystem first
   * zero out the device (destroy the filesystem)
Operation refused.
 
Command 'drbdmeta 1 v09 /dev/sdb1 internal create-md 1' terminated with exit code 40
解决:
# dd if=/dev/zero of=/dev/sdb1 bs=1M count=100

问题3:
启用资源时提示失败,无法创建transport:
# drbdadm up r1
r1: Failure: (172) Failed to create transport (drbd_transport_xxx module missing?)
Command 'drbdsetup new-peer r1 1 --_name=node2 --shared-secret=123456 --cram-hmac-alg=sha1 --protocol=C' terminated with exit code 10
drbdadm: new-path r1: skipped due to earlier error
解决:
安装drbd_transport_tcp模块
# cp drbd_transport_tcp.ko /lib/modules/3.10.0-229.el7.x86_64/kernel/lib/
# depmod  
# modprobe drbd_transport_tcp
# lsmod|grep drbd_transport_tcp
drbd_transport_tcp     17731  0 
drbd                  463948  2 drbd_transport_tcp

问题4:
启用资源时提示失败,配置要求无效:
# drbdadm up r1
r1: Failure: (162) Invalid configuration request
additional info from kernel:
minor exists as different volume
Command 'drbdsetup new-minor r1 1 1' terminated with exit code 10
解决:
安装drbd模块

# cp drbd.ko /lib/modules/3.10.0-229.el7.x86_64/kernel/lib/
# depmod
# modprobe drbd
# lsmod |grep drbd
drbd                  463948  0 
libcrc32c              12644  2 xfs,drbd

问题5:
启动资源时提示失败,设备或资源忙
# drbdadm up r1
No valid meta data found
Command 'drbdmeta 1 v09 /dev/sdb1 internal apply-al' terminated with exit code 255
# drbdadm create-md all
open(/dev/sdb1) failed: Device or resource busy
Exclusive open failed. Do it anyways?
[need to type 'yes' to confirm] yes
# Output might be stale, since minor 1 is attached
Device '1' is configured!
Command 'drbdmeta 1 v09 /dev/sdb1 internal create-md 1' terminated with exit code 20
解决:
fdisk /dev/sdb 删除该分区保存,然后再重新新建分区并保存即可

问题6:
提示unknown resource
# drbdadm primary r1
r1: Failure: (158) Unknown resource
additional info from kernel:
unknown resource
Command 'drbdsetup primary r1' terminated with exit code 10
解决:
# /etc/init.d/drbd start

问题7及解决:
# drbd-overview 
 1:r1/1  Connec/Connec Second/Unknow UpToDa/DUnkno

# systemctl stop firewalld

# drbd-overview 
 1:r1/1  Connec/StaAlo Second/Unknow Outdat/DUnkno

# getenforce 0
 
问题8:
node2无法连接node1,显示node1为StandAlone

[root@node1 ~]# drbd-overview 
 1:mysqldata/1  Connec/Connec Second/Unknow UpToDa/DUnkno

[root@node2 ~]# drbd-overview 
 1:mysqldata/1  Connec/StaAlo Second/Unknow UpToDa/DUnkno
解决:
[root@node2 ~]# drbdadm --discard-my-data connect all
[root@node2 ~]# drbd-overview 
 1:mysqldata/1  Connec/Connec Second/Unknow UpToDa/DUnkno 
[root@node2 ~]# drbd-overview                    ##从上一状态转为该状态需要些时间
 1:mysqldata/1  Connected(2*) Secondary(2*) UpToDa/UpToDa

问题9及解决:
node1和node2互相认为对方StandAlone
[root@node1 ~]# drbd-overview 
 1:mysqldata/1  Connec/StaAlo Second/Unknow UpToDa/DUnkno 
[root@node1 ~]# drbdadm --discard-my-data connect all
[root@node1 ~]# drbd-overview 
 1:mysqldata/1  Connec/Connec Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbd-overview 
 1:mysqldata/1  Connec/StaAlo Second/Unknow UpToDa/DUnkno
[root@node2 ~]# drbdadm connect all
[root@node2 ~]# drbd-overview 
 1:mysqldata/1  Connected(2*) Secondary(2*) UpToDa/UpToDa

arrow
arrow
    創作者介紹
    創作者 echochio 的頭像
    echochio

    echochio

    echochio 發表在 痞客邦 留言(0) 人氣()