1 删除osd设备
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
[root@ceph04 ~]# service ceph stop osd.3 //停止该设备 === osd.3 === Stopping ceph osd.3 on ceph04...kill 12012...done [root@ceph04 ~]# ceph osd crush remove osd.3 //从CRUSH中移除 removed item id 3 name 'osd.3' from crush map [root@ceph04 ~]# ceph osd tree # id weight type name up/down reweight -1 3 root default -3 3 rack unknownrack -2 1 host ceph01 0 1 osd.0 up 1 -4 1 host ceph02 1 1 osd.1 up 1 -5 1 host ceph03 2 1 osd.2 up 1 -6 0 host ceph04 3 0 osd.3 down 1 [root@ceph04 ~]# ceph auth del osd.3 //从认证中删除 updated [root@ceph04 ~]# ceph osd rm 3 //删除 removed osd.3 |
2 新增osd 设备
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
[root@ceph04 data]# mkfs.xfs -f /dev/sdb //格式化分区 [root@ceph04 data]# mount /dev/sdb /data/osd.3/ //挂载分区 [root@ceph04 data]# ceph-osd -i 3 --mkfs --mkkey //对指定的目录mkcephfs 2014-02-26 11:17:48.014785 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway 2014-02-26 11:17:48.049559 7f94ef4947a0 -1 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway 2014-02-26 11:17:48.059596 7f94ef4947a0 -1 filestore(/data/osd.3) could not find 23c2fcde/osd_superblock/0//-1 in index: (2) No such file or directory 2014-02-26 11:17:48.150783 7f94ef4947a0 -1 created object store /data/osd.3 journal /data/osd.3/journal for osd.3 fsid c9871314-3f0b-42c5-8bc7-ad14d41977a0 2014-02-26 11:17:48.150840 7f94ef4947a0 -1 auth: error reading file: /data/osd.3/keyring: can't open /data/osd.3/keyring: (2) No such file or directory 2014-02-26 11:17:48.150949 7f94ef4947a0 -1 created new key in keyring /data/osd.3/keyring [root@ceph04 osd.3]# ceph auth add osd.3 osd 'allow *' mon 'allow rwx' -i /data/osd.3/keyring //添加allow rwx 规则 2014-02-26 11:19:26.004404 7f46b7ee7760 -1 read 56 bytes from /data/osd.3/keyring added key for osd.3 oot@ceph01 ceph]# ceph osd getcrushmap -o map //获得crushmap信息 got crush map from osdmap epoch 12 [root@ceph01 ceph]# ls fetch_config map ceph.conf ceph.keyring [root@ceph01 ceph]# crushtool -d map //格式化输出crushmap信息 # begin crush map # devices device 0 osd.0 device 1 osd.1 device 2 osd.2 # types type 0 osd type 1 host type 2 rack type 3 row type 4 room type 5 datacenter type 6 root # buckets host ceph01 { id -2 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.0 weight 1.000 } host ceph02 { id -4 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.1 weight 1.000 } host ceph03 { id -5 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.2 weight 1.000 } host ceph04 { id -6 # do not change unnecessarily # weight 0.000 alg straw hash 0 # rjenkins1 } rack unknownrack { id -3 # do not change unnecessarily # weight 3.000 alg straw hash 0 # rjenkins1 item ceph01 weight 1.000 item ceph02 weight 1.000 item ceph03 weight 1.000 item ceph04 weight 0.000 } root default { id -1 # do not change unnecessarily # weight 3.000 alg straw hash 0 # rjenkins1 item unknownrack weight 3.000 } # rules rule data { ruleset 0 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } rule metadata { ruleset 1 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } rule rbd { ruleset 2 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } [root@ceph04 osd.3]# ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04 //设置crushmap set item id 3 name 'osd.3' weight 1 at location {host=ceph04,rack=unknownrack,root=default} to crush map host ceph04 { id -6 # do not change unnecessarily # weight 1.000 alg straw hash 0 # rjenkins1 item osd.3 weight 1.000 } rack unknownrack { id -3 # do not change unnecessarily # weight 4.000 alg straw hash 0 # rjenkins1 item ceph01 weight 1.000 item ceph02 weight 1.000 item ceph03 weight 1.000 item ceph04 weight 1.000 ************************ ceph osd crush set 3 1.0 root=default rack=unknownrack host=ceph04的另一种做法 1.修改map.txt vi map.txt #devices device 0 osd.0 device 1 osd.1 device 2 osd.2 device 3 osd.3 host osd3 { id -1 alg straw hash 0 item osd.3 weight 1.00 } 2.编译crushmap cephtool -c /root/map.txt -o map 3.将编译好的map再次导入 ceph osd crushmap -i map ******************************* [root@ceph04 osd.3]# service ceph start osd.3 //启动osd.3 === osd.3 === Mounting xfs on ceph04:/data/osd.3 create-or-move updated item id 3 name 'osd.3' weight 0.02 at location {host=ceph04,root=default} to crush map Starting Ceph osd.3 on ceph04... starting osd.3 at :/0 osd_data /data/osd.3 /data/osd.3/journal [root@ceph04 osd.3]# ceph -s health HEALTH_WARN 1 pgs recovery_wait; 1 pgs stuck unclean; recovery 1/42 degraded (2.381%); recovering 4 o/s, 3553B/s; clock skew detected on mon.ceph02, mon.ceph03 monmap e1: 3 mons at {ceph01=192.168.9.62:6789/0,ceph02=192.168.9.63:6789/0,ceph03=192.168.9.73:6789/0}, election epoch 6, quorum 0,1,2 ceph01,ceph02,ceph03 osdmap e16: 4 osds: 4 up, 4 in pgmap v459: 960 pgs: 14 active, 945 active+clean, 1 active+recovery_wait; 9518 bytes data, 4154 MB used, 77725 MB / 81880 MB avail; 1001B/s wr, 0op/s; 1/42 degraded (2.381%); recovering 4 o/s, 3553B/s mdsmap e5: 1/1/1 up {0=ucms01=up:active}, 1 up:standby [root@ceph04 osd.3]# ceph osd tree # id weight type name up/down reweight -1 4 root default -3 4 rack unknownrack -2 1 host ceph01 0 1 osd.0 up 1 -4 1 host ceph02 1 1 osd.1 up 1 -5 1 host ceph03 2 1 osd.2 up 1 -6 1 host ceph04 3 1 osd.3 up 1 |
可能遇到的问题:
1 2 |
ceph mds stat mdsmap e63: 1/1/1 up {0=ceph02=up:replay}, 1 up:standby |
osd日志信息为:
1 2 3 |
2014-02-26 10:42:15.386552 7f33b5e40700 0 -- 192.168.9.63:6802/13005 >> 192.168.9.62:6803/18894 pipe(0x3fa1900 sd=29 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state connecting 2014-02-26 10:42:16.394540 7f33b5639700 0 -- 192.168.9.63:6802/13005 >> 192.168.9.39:6802/11369 pipe(0x3fa1680 sd=33 :6802 s=0 pgs=0 cs=0 l=0).accept connect_seq 0 vs existing 0 state wait 2014-02-26 10:42:17.029623 7f33b5033700 0 -- 192.168.9.63:6801/13005 >> 192.168.9.62:0/3872604662 pipe(0x3fa4d80 sd=35 :6801 s=0 pgs=0 cs=0 l=0).accept peer addr is really 192.168.9.62:0/3872604662 (socket is 192.168.9.62:39504/0) |
mds 日志信息为:
1 |
2014-02-26 10:42:12.824284 7f57ba9de700 0 -- 192.168.9.63:6800/12877 >> 192.168.9.62:6801/17332 pipe(0x2450000 sd=17 :37551 s=1 pgs=0 cs=0 l=1).connect claims to be 0.0.0.0:6801/18894 not 192.168.9.62:6801/17332 - wrong node! |
错误的原因可能为:osd 没有在crushmap中。