1.1 移出故障osd


[root@ceph2 ceph]# df -hT

Filesystem Type Size Used Avail Use% Mounted on /dev/vda1 xfs 40G 2.7G 38G 7% / devtmpfs devtmpfs 1.9G 0 1.9G 0% /dev tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm tmpfs tmpfs 1.9G 201M 1.7G 11% /run tmpfs tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup /dev/vdb1 xfs 15G 213M 15G 2% /var/lib/ceph/osd/ceph-0 /dev/vdc1 xfs 15G 228M 15G 2% /var/lib/ceph/osd/ceph-3 /dev/vdd1 xfs 15G 152M 15G 1% /var/lib/ceph/osd/ceph-6 tmpfs tmpfs 380M 0 380M 0% /run/user/0


[root@ceph2 ceph]# systemctl stop ceph-osd@0


[root@ceph2 ceph]# ceph -s

cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_WARN 1 osds down Degraded data redundancy: 67/663 objects degraded (10.106%), 170 pgs unclean, 170 pgs degraded services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 8 up, 9 in rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 221 objects, 241 MB usage: 1770 MB used, 133 GB / 134 GB avail pgs: 67/663 objects degraded (10.106%) 334 active clean 170 active undersized degraded

[root@ceph2 ceph]# ceph osd tree

ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -9 0.13499 root dc1 -10 0.04500 rack rack1 -3 0.04500 host ceph2 0 hdd 0.01500 osd.0 down 1.00000 1.00000 3 hdd 0.01500 osd.3 up 1.00000 1.00000 6 hdd 0.01500 osd.6 up 1.00000 1.00000 -11 0.04500 rack rack2 -7 0.04500 host ceph3 2 hdd 0.01500 osd.2 up 1.00000 1.00000 5 hdd 0.01500 osd.5 up 1.00000 1.00000 8 hdd 0.01500 osd.8 up 1.00000 1.00000 -12 0.04500 rack rack3 -5 0.04500 host ceph4 1 hdd 0.01500 osd.1 up 1.00000 1.00000 4 hdd 0.01500 osd.4 up 1.00000 0.50000 7 hdd 0.01500 osd.7 up 1.00000 1.00000 -1 0.13499 root default -3 0.04500 host ceph2 0 hdd 0.01500 osd.0 down 1.00000 1.00000 3 hdd 0.01500 osd.3 up 1.00000 1.00000 6 hdd 0.01500 osd.6 up 1.00000 1.00000 -7 0.04500 host ceph3 2 hdd 0.01500 osd.2 up 1.00000 1.00000 5 hdd 0.01500 osd.5 up 1.00000 1.00000 8 hdd 0.01500 osd.8 up 1.00000 1.00000 -5 0.04500 host ceph4 1 hdd 0.01500 osd.1 up 1.00000 1.00000 4 hdd 0.01500 osd.4 up 1.00000 0.50000 7 hdd 0.01500 osd.7 up 1.00000 1.00000

[root@ceph2 ceph]# ceph osd out osd.0

marked out osd.0.

[root@ceph2 ceph]# ceph -s

cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_WARN Degraded data redundancy: 126/663 objects degraded (19.005%), 24 pgs unclean, 24 pgs degraded services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 8 up, 8 in rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 221 objects, 241 MB usage: 1587 MB used, 118 GB / 119 GB avail pgs: 126/663 objects degraded (19.005%) 480 active clean 23 active recovery_wait degraded 1 active recovering degraded io: client: 4196 B/s rd, 0 B/s wr, 3 op/s rd, 0 op/s wr recovery: 2873 kB/s, 0 keys/s, 2 objects/s

[root@ceph2 ceph]# ceph osd crush rm osd.0

removed item id 0 name 'osd.0' from crush map

[root@ceph2 ceph]# ceph auth list|grep osd.0

installed auth entries: osd.0

[root@ceph2 ceph]# ceph auth rm osd.0


[root@ceph2 ceph]# ceph -s

cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_OK services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 8 up, 8 in rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 221 objects, 241 MB usage: 1656 MB used, 118 GB / 119 GB avail pgs: 504 active clean io: client: 0 B/s wr, 0 op/s rd, 0 op/s wr

[root@ceph2 ceph]# ceph osd rm osd.0

removed osd.0

[root@ceph2 ceph]# ceph -s

cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_OK services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 8 osds: 8 up, 8 in rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 221 objects, 241 MB usage: 1656 MB used, 118 GB / 119 GB avail pgs: 504 active clean io: client: 5321 B/s rd, 0 B/s wr, 5 op/s rd, 0 op/s wr


ceph osd out osd.3 systemctl stop ceph-osd@3 ceph osd purge osd.3 #综合这一步,就可以完成操作 删除配置文件中针对该osd的配置




[root@ceph2 ceph-0]# ceph osd create 0 [root@ceph2 ceph-0]# ceph-authtool --create-keyring /etc/ceph/ceph.osd.0.keyring --gen-key -n osd.0 --cap mon 'allow profile osd' --cap mgr 'allow profile osd' --cap osd 'allow *' creating /etc/ceph/ceph.osd.0.keyring [root@ceph2 ceph-0]# ceph auth import -i /etc/ceph/ceph.osd.0.keyring imported keyring [root@ceph2 ceph-0]# ceph auth get-or-create osd.0 -o /var/lib/ceph/osd/ceph-0/keyring [root@ceph2 ceph-0]# ceph-osd -i 0 --mkfs --cluster ceph 2019-03-29 07:57:58.928076 7f564d51fd00 -1 created object store /var/lib/ceph/osd/ceph-0 for osd.0 fsid 35a91e48-8244-4e96-a7ee-980ab989d20d [root@ceph2 ceph-0]# cd /var/lib/ceph/osd/ceph-0 [root@ceph2 ceph-0]# rm -f journal [root@ceph2 ceph-0]# partuuid_0=`blkid /dev/vdb1|awk -F "[\"\"]" '{print $8}'` [root@ceph2 ceph-0]# echo $partuuid_0 745dce53-1c63-4c50-b434-d441038dafe4 [root@ceph2 ceph-0]# ln -s /dev/disk/by-partuuid/$partuuid_0 ./journal [root@ceph2 ceph-0]# ll total 64 -rw-r--r-- 1 root root 393 Mar 16 12:46 activate.monmap -rw-r--r-- 1 ceph ceph 3 Mar 16 12:46 active -rw-r--r-- 1 ceph ceph 37 Mar 16 12:46 ceph_fsid drwxr-xr-x 344 ceph ceph 12288 Mar 28 10:40 current -rw-r--r-- 1 ceph ceph 37 Mar 16 12:46 fsid lrwxrwxrwx 1 root root 58 Mar 29 07:59 journal -> /dev/disk/by-partuuid/745dce53-1c63-4c50-b434-d441038dafe4 -rw-r--r-- 1 ceph ceph 37 Mar 16 12:46 journal_uuid -rw------- 1 ceph ceph 56 Mar 29 07:57 keyring -rw-r--r-- 1 ceph ceph 21 Mar 16 12:46 magic -rw-r--r-- 1 ceph ceph 6 Mar 16 12:46 ready -rw-r--r-- 1 ceph ceph 4 Mar 16 12:46 store_version -rw-r--r-- 1 ceph ceph 53 Mar 16 12:46 superblock -rw-r--r-- 1 ceph ceph 0 Mar 16 12:47 systemd -rw-r--r-- 1 ceph ceph 10 Mar 16 12:46 type -rw-r--r-- 1 ceph ceph 2 Mar 16 12:46 whoami [root@ceph2 ceph-0]# chown ceph.ceph -R /var/lib/ceph [root@ceph2 ceph-0]# ceph-osd --mkjournal -i 0 --cluster ceph 2019-03-29 08:00:02.007442 7f416ec90d00 -1 journal read_header error decoding journal header 2019-03-29 08:00:02.018206 7f416ec90d00 -1 created new journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0 [root@ceph2 ceph-0]# chown ceph.ceph /dev/disk/by-partuuid/$partuuid_0 [root@ceph2 ceph-0]# ceph osd crush add-bucket ceph2 host --cluster ceph bucket 'ceph2' already exists #不用创建bucket,在移除的时候,并没有移除主机的bucket [root@ceph2 ceph-0]# ceph osd crush move ceph2 root=default --cluster ceph #也不需要把ceph2添加到default的这个crushrule中 no need to move item id -3 name 'ceph2' to location {root=default} in crush map [root@ceph2 ceph-0]# ceph osd crush add osd.0 0.01500 root=default host=ceph2 add item id 0 name 'osd.0' weight 0.015 at location {host=ceph2,root=default} to crush map [root@ceph2 ceph-0]# systemctl start ceph-osd@0 [root@ceph2 ceph-0]# systemctl enable ceph-osd@0 [root@ceph2 ceph-0]# ps -ef|grep osd ceph 1147069 1 0 Mar28 ? 00:02:51 /usr/bin/ceph-osd -f --cluster ceph --id 6 --setuser ceph --setgroup ceph ceph 1147169 1 0 Mar28 ? 00:03:19 /usr/bin/ceph-osd -f --cluster ceph --id 3 --setuser ceph --setgroup ceph ceph 1220601 1 6 08:04 ? 00:00:01 /usr/bin/ceph-osd -f --cluster ceph --id 0 --setuser ceph --setgroup ceph root 1220713 1156971 0 08:04 pts/0 00:00:00 grep --color=auto osd [root@ceph2 ceph-0]# ceph -s cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_WARN 1/2481 objects misplaced (0.040%) Degraded data redundancy: 423/2481 objects degraded (17.050%), 8 pgs unclean, 16 pgs degraded #有数据正在重平衡 services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 9 up, 9 in; 1 remapped pgs rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 827 objects, 2206 MB usage: 6747 MB used, 128 GB / 134 GB avail pgs: 423/2481 objects degraded (17.050%) 1/2481 objects misplaced (0.040%) 487 active clean 14 active recovery_wait degraded 2 active recovering degraded 1 active remapped backfill_wait io: client: 4093 B/s rd, 0 B/s wr, 4 op/s rd, 0 op/s wr recovery: 20080 kB/s, 0 keys/s, 6 objects/s [root@ceph2 ceph-0]# ceph -s cluster: id: 35a91e48-8244-4e96-a7ee-980ab989d20d health: HEALTH_OK services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph3, ceph2 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 9 up, 9 in #恢复正常,有9个OSD,切集群处于健康状态 rbd-mirror: 1 daemon active data: pools: 13 pools, 504 pgs objects: 827 objects, 2206 MB usage: 6614 MB used, 128 GB / 134 GB avail pgs: 504 active clean io: client: 4093 B/s rd, 0 B/s wr, 4 op/s rd, 0 op/s wr

1.2 移除故障节点


1. 先移除节点上所有osd

2. ceph osd crush remove serverc


1. 先迁移节点上所有osd

2. 修改crushmap,删除所有与该节点相关的配置

1.3 恢复和回填OSD



osd_recovery_op_priority # 值为1-63,默认为10,相对于客户端操作,恢复操作的优先级,默认客户端操作的优先级为63,参数为osd_client_op_priority

osd_recovery_max_active # 每个osd一次处理的活跃恢复请求数量,默认为15,增大此值可加速恢复,但会增加集群负载

osd_recovery_threads # 用于数据恢复时的线程数,默认为1

osd_max_backfills # 单个osd的最大回填操作数,默认为10

osd_backfill_scan_min # 回填操作时最小扫描对象数量,默认为64

osd_backfill_scan_max # 回填操作的最大扫描对象数量,默认为512

osd_backfill_full_ratio # osd的占满率达到多少时,拒绝接受回填请求,默认为0.85

osd_backfill_retry_interval # 回填重试的时间间隔


二、 monitor管理2.1 摘除monitor


[root@ceph2 ceph]# systemctl stop ceph-mon@ceph2


[root@ceph2 ceph]# ceph mon remove ceph2

removing mon.ceph2 at, there will be 2 monitors

[root@ceph2 ceph]# ceph -s

services: mon: 2 daemons, quorum ceph3,ceph4 mgr: ceph4(active), standbys: ceph2, ceph3 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 9 up, 9 in rbd-mirror: 1 daemon active


[root@ceph2 ceph]# cd /var/lib/ceph/mon/ [root@ceph2 mon]# ls ceph-ceph2 [root@ceph2 mon]# rm -rf ceph-ceph2/

2.2 添加monitor节点

[root@ceph2 mon]# cd


[root@ceph2 ~]# mkdir /var/lib/ceph/mon/ceph-ceph2

[root@ceph2 ~]# chown ceph.ceph -R !$

chown ceph.ceph -R /var/lib/ceph/mon/ceph-ceph2

[root@ceph2 ~]# ceph auth get mon.

exported keyring for mon. [mon.] key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew== caps mon = "allow *"

[root@ceph2 ~]# ceph auth get mon. -o /tmp/ceph.mon.keyring

exported keyring for mon.

[root@ceph2 ~]# cat /tmp/ceph.mon.keyring

[mon.] key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew== caps mon = "allow *"

[root@ceph2 ~]# ceph mon getmap -o /tmp/monmap.bin

got monmap epoch 2

[root@ceph2 ~]# monmaptool --print /tmp/monmap.bin

monmaptool: monmap file /tmp/monmap.bin epoch 2 fsid 35a91e48-8244-4e96-a7ee-980ab989d20d last_changed 2019-03-28 08:57:25.819243 created 2019-03-16 12:39:14.839999 0: mon.ceph3 1: mon.ceph4

[root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --mkfs --monmap /tmp/monmap.bin --keyring /tmp/ceph.mon.keyring

[root@ceph2 ~]# ll /var/lib/ceph/mon/ceph-ceph2/

total 8 -rw------- 1 ceph ceph 77 Mar 28 09:03 keyring -rw-r--r-- 1 ceph ceph 8 Mar 28 09:03 kv_backend drwxr-xr-x 2 ceph ceph 112 Mar 28 09:03 store.db

[root@ceph2 ~]# ps -ef |grep ceph-mon

root 1135665 1088603 0 09:12 pts/0 00:00:00 grep --color=auto ceph-mon

[root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --public-addr

[root@ceph2 ~]# !ps

ps -ef |grep ceph-mon ceph 1135726 1 1 09:13 ? 00:00:00 ceph-mon -i ceph2 --public-addr root 1135771 1088603 0 09:13 pts/0 00:00:00 grep --color=auto ceph-mon

[root@ceph2 ~]# ceph -s

services: mon: 3 daemons, quorum ceph2,ceph3,ceph4 mgr: ceph4(active), standbys: ceph2, ceph3 mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby osd: 9 osds: 9 up, 9 in rbd-mirror: 1 daemon active

2.3 monitor故障排查

[root@ceph2 ~]# ceph daemon mon.ceph2 quorum_status

{ #查看monipot票数 "election_epoch": 128, "quorum": [ 0, 1, 2 ], "quorum_names": [ "ceph2", "ceph3", "ceph4" ], "quorum_leader_name": "ceph2", "monmap": { "epoch": 3, "fsid": "35a91e48-8244-4e96-a7ee-980ab989d20d", "modified": "2019-03-28 09:13:19.932456", "created": "2019-03-16 12:39:14.839999", "features": { "persistent": [ "kraken", "luminous" ], "optional": [] }, "mons": [ { "rank": 0, "name": "ceph2", "addr": "", "public_addr": "" }, { "rank": 1, "name": "ceph3", "addr": "", "public_addr": "" }, { "rank": 2, "name": "ceph4", "addr": "", "public_addr": "" } ] } }

2.4 利用admin sockets管理守护进程

通过admin sockets,管理员可以直接与守护进程交互。如查看和修改守护进程的配置参数。


基于admin sockets的操作:

ceph daemon $type.$id command

或者ceph --admin-daemon /var/run/ceph/$cluster-$type.$id.asok command



config get parameter

config set parameter

config show

perf dump


[root@ceph2 ceph]# ceph daemon osd.6 config show|grep osd_default

"osd_default_data_pool_replay_window": "45", "osd_default_notify_timeout": "30",

[root@ceph2 ceph]# ceph daemon osd.6 config get xio_mp_max_64

{ "xio_mp_max_64": "65536" }


[root@ceph2 ceph]# ceph tell osd.* injectargs --xio_mp_max_64 65536

osd.1: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.2: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.3: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.4: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.5: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.6: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.7: xio_mp_max_64 = '65536' (not observed, change may require restart) osd.8: xio_mp_max_64 = '65536' (not observed, change may require restart)


