ceph 添加/移除osd 设备

1 删除osd设备

[root@ceph04 ~]# service ceph stop osd.3 //停止该设备
=== osd.3 ===
Stopping ceph osd.3 on ceph04...kill 12012...done
[root@ceph04 ~]# ceph osd crush remove osd.3 //从CRUSH中移除
removed item id 3 name 'osd.3' from crush map
[root@ceph04 ~]# ceph osd tree

# id    weight  type name       up/down reweight
-1      3       root default
-3      3               rack unknownrack
-2      1                       host ceph01
0       1                               osd.0   up      1
-4      1                       host ceph02
1       1                               osd.1   up      1
-5      1                       host ceph03
2       1                               osd.2   up      1
-6      0                       host ceph04

3       0       osd.3   down      1

[root@ceph04 ~]# ceph auth del osd.3 //从认证中删除
updated

[root@ceph04 ~]# ceph osd rm 3 //删除
removed osd.3

2 新增osd 设备

[root@ceph04 data]# mkfs.xfs -f /dev/sdb //格式化分区
[root@ceph04 data]# mount   /dev/sdb /data/osd.3/ //挂载分区
[root@ceph04 data]# ceph-osd -i 3 --mkfs --mkkey //对指定的目录mkcephfs
2014-02-26 11:17:48.014785 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal.  Use journal_force_aio to force use of aio anyway
2014-02-26 11:17:48.049559 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal.  Use journal_force_aio to force use of aio anyway
2014-02-26 11:17:48.059596 7f94ef4947a0 -1 filestore(/data/osd.3) could not find 23c2fcde/osd_superblock/0//-1 in index: (2) No such file or directory
2014-02-26 11:17:48.150783 7f94ef4947a0 -1 created object store /data/osd.3 journal /data/osd.3/journal for osd.3 fsid c9871314-3f0b-42c5-8bc7-ad14d41977a0
2014-02-26 11:17:48.150840 7f94ef4947a0 -1 auth: error reading file: /data/osd.3/keyring: can't open /data/osd.3/keyring: (2) No such file or directory
2014-02-26 11:17:48.150949 7f94ef4947a0 -1 created new key in keyring /data/osd.3/keyring

[root@ceph04 osd.3]# ceph auth add osd.3 osd 'allow *' mon 'allow rwx' -i /data/osd.3/keyring //添加allow rwx 规则
2014-02-26 11:19:26.004404 7f46b7ee7760 -1 read 56 bytes from /data/osd.3/keyring
added key for osd.3


oot@ceph01 ceph]# ceph osd getcrushmap -o map //获得crushmap信息
got crush map from osdmap epoch 12
[root@ceph01 ceph]# ls
fetch_config  map  ceph.conf  ceph.keyring
[root@ceph01 ceph]# crushtool -d map  //格式化输出crushmap信息
# begin crush map

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2

# types
type 0 osd
type 1 host
type 2 rack
type 3 row
type 4 room
type 5 datacenter
type 6 root

# buckets
host ceph01 {
        id -2           # do not change unnecessarily
        # weight 1.000
        alg straw
        hash 0  # rjenkins1
        item osd.0 weight 1.000
}
host ceph02 {
        id -4           # do not change unnecessarily
        # weight 1.000
        alg straw
        hash 0  # rjenkins1
        item osd.1 weight 1.000
}
host ceph03 {
        id -5           # do not change unnecessarily
        # weight 1.000
        alg straw
        hash 0  # rjenkins1
        item osd.2 weight 1.000
}
host ceph04 {
        id -6           # do not change unnecessarily
        # weight 0.000
        alg straw
        hash 0  # rjenkins1
}
rack unknownrack {
        id -3           # do not change unnecessarily
        # weight 3.000
        alg straw
        hash 0  # rjenkins1
        item ceph01 weight 1.000
        item ceph02 weight 1.000
        item ceph03 weight 1.000
        item ceph04 weight 0.000
}
root default {
        id -1           # do not change unnecessarily
        # weight 3.000
        alg straw
        hash 0  # rjenkins1
        item unknownrack weight 3.000
}

# rules
rule data {
        ruleset 0
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}
rule metadata {
        ruleset 1
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}
rule rbd {
        ruleset 2
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}
[root@ceph04 osd.3]# ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04 //设置crushmap
set item id 3 name 'osd.3' weight 1 at location {host=ceph04,rack=unknownrack,root=default} to crush map



host ceph04 {
        id -6           # do not change unnecessarily
        # weight 1.000
        alg straw
        hash 0  # rjenkins1
        item osd.3 weight 1.000
}
rack unknownrack {
        id -3           # do not change unnecessarily
        # weight 4.000
        alg straw
        hash 0  # rjenkins1
        item ceph01 weight 1.000
        item ceph02 weight 1.000
        item ceph03 weight 1.000
        item ceph04 weight 1.000
************************
ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04的另一种做法

1.修改map.txt
vi map.txt
#devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
  
host osd3 {
        id -1
        alg straw
        hash 0
        item osd.3 weight 1.00
}

2.编译crushmap
    cephtool -c /root/map.txt -o map
3.将编译好的map再次导入
    ceph osd crushmap -i map
*******************************

[root@ceph04 osd.3]# service ceph start osd.3 //启动osd.3
=== osd.3 ===
Mounting xfs on ceph04:/data/osd.3
create-or-move updated item id 3 name 'osd.3' weight 0.02 at location {host=ceph04,root=default} to crush map
Starting Ceph osd.3 on ceph04...
starting osd.3 at :/0 osd_data /data/osd.3 /data/osd.3/journal

[root@ceph04 osd.3]# ceph -s
   health HEALTH_WARN 1 pgs recovery_wait; 1 pgs stuck unclean; recovery 1/42 degraded (2.381%);  recovering 4 o/s, 3553B/s; clock skew detected on mon.ceph02, mon.ceph03
   monmap e1: 3 mons at {ceph01=192.168.9.62:6789/0,ceph02=192.168.9.63:6789/0,ceph03=192.168.9.73:6789/0}, election epoch 6, quorum 0,1,2 ceph01,ceph02,ceph03
   osdmap e16: 4 osds: 4 up, 4 in
    pgmap v459: 960 pgs: 14 active, 945 active+clean, 1 active+recovery_wait; 9518 bytes data, 4154 MB used, 77725 MB / 81880 MB avail; 1001B/s wr, 0op/s; 1/42 degraded (2.381%);  recovering 4 o/s, 3553B/s
   mdsmap e5: 1/1/1 up {0=ucms01=up:active}, 1 up:standby

[root@ceph04 osd.3]# ceph osd tree

# id    weight  type name       up/down reweight
-1      4       root default
-3      4               rack unknownrack
-2      1                       host ceph01
0       1                               osd.0   up      1
-4      1                       host ceph02
1       1                               osd.1   up      1
-5      1                       host ceph03
2       1                               osd.2   up      1
-6      1                       host ceph04
3       1                               osd.3   up      1

可能遇到的问题:

ceph mds stat
mdsmap e63: 1/1/1 up {0=ceph02=up:replay}, 1 up:standby

osd日志信息为:

2014-02-26 10:42:15.386552 7f33b5e40700  0 -- 192.168.9.63:6802/13005 >> 192.168.9.62:6803/18894 pipe(0x3fa1900 sd=29 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state connecting
2014-02-26 10:42:16.394540 7f33b5639700  0 -- 192.168.9.63:6802/13005 >> 192.168.9.39:6802/11369 pipe(0x3fa1680 sd=33 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state wait
2014-02-26 10:42:17.029623 7f33b5033700  0 -- 192.168.9.63:6801/13005 >> 192.168.9.62:0/3872604662 pipe(0x3fa4d80 sd=35 :6801 s=0 pgs=0 cs=0 l=0).accept peer addr is really 192.168.9.62:0/3872604662 (socket is 192.168.9.62:39504/0)

mds 日志信息为:

2014-02-26 10:42:12.824284 7f57ba9de700  0 -- 192.168.9.63:6800/12877 >> 192.168.9.62:6801/17332 pipe(0x2450000 sd=17 :37551 s=1 pgs=0 cs=0 l=1).connect claims to be 0.0.0.0:6801/18894 not 192.168.9.62:6801/17332 - wrong node!

错误的原因可能为:osd 没有在crushmap中。

发表评论

您的电子邮箱地址不会被公开。