CEPH

Releases
https://docs.ceph.com/docs/master/releases/

CLI

ceph -v
ceph -s
ceph df
 
ceph status
ceph health detail
ceph df detail
ceph osd lspools
ceph osd pool ls detail
 
# replication size / count
ceph osd pool set os-dev_glance min_size 1
ceph osd pool set os-dev_glance size 2
ceph osd pool get os-dev_glance size
 
# disable rebuild
ceph osd set noout 
 
# enable rebuild
ceph osd unset noout
 
#default 1 3 3
ceph tell 'osd.*' injectargs '--osd_max_backfills 1 --osd_recovery_max_active 1 --osd_recovery_op_priority 3'
 
# health
ceph health detail 
ceph --version
rados list-inconsistent-obj 6.115 --format=json-pretty
ceph pg repair 6.115
ceph -w
 
# erasure
ceph osd erasure-code-profile ls
ceph osd pool create <name> <pg> erasure <profile>
 
ceph health
 
# watch
ceph -w
 
ceph osd unset norecover
ceph osd unset nobackfill
ceph osd unset noout
ceph health
 
# get cluster’s FSID/UUID
ceph fsid
 
# additional commands
ceph tell mon.\* injectargs '--osd_pool_default_size=2'
ceph config set mgr target_max_misplaced_ratio .01
ceph osd set-require-min-compat-client luminous

monitoring

ceph mgr module enable prometheus

balancer

ceph mgr module enable balancer
ceph balancer mode upmap
ceph balancer on

rados / pool

rados df

Monitoring
https://github.com/ceph/ceph-nagios-plugins/releases

# Build nagios-plugins-ceph
sudo apt-get install -y devscripts fakeroot build-essential dh-python
git clone https://github.com/ceph/ceph-nagios-plugins.git
cd ceph-nagios-plugins#
make deb
sudo dpkg -i nagios-plugins-ceph_*_all.deb
 
# create wrapper for kolla-ansible installation
cat <<EOF> /usr/bin/ceph
#!/bin/bash
docker exec -it ceph_mon ceph $@
EOF
chmod +x /usr/bin/ceph
 
/usr/lib/nagios/plugins/check_ceph_health

Replace OSD

ceph -s
ceph osd status
ceph osd df tree
 
# ssh ceph1-osd8-dev
systemctl status ceph-osd@32
lsblk
ls -la /var/lib/ceph/osd/ceph-32
ls -l /dev/disk/by-partuuid/c8af71de-f5ae-4f62-ab88-8c9aa30c0f0c
ls -l /dev/disk/by-partuuid/b03b6a29-94d0-4a6e-a740-5dabaa144231
ceph -w
 
# Remove OSD
ssh ceph1-admin1-dev
salt-run disengage.safety
salt-run remove.osd 32

Upgrading Ceph from Luminous to Nautilus
https://aptira.com/upgrading-ceph-from-luminous-to-nautilus/

Debug

join -a1 -j6 <(
     lsscsi -g | awk '$4 = /ST8000NM0055-1RM/ {
         cmd = "find -L /dev/disk/by-id -samefile "$6" -name \"scsi-SATA_ST8000NM0055-1RM*\""
         cmd | getline result
         close(cmd)
         print $0,result}') <(
     for OSD in $(ceph osd ls-tree $(hostname)); ceph fsid
b89a5542-2437-443c-b097-51e597a510afdo
         i=/var/lib/ceph/osd/ceph-$OSD/block
         DISK=$(readlink -f $i | sed 's%[0-9]\+$%%; s%\(nvme[0-9]n1\)p%\1%')
         echo "$i is on $(readlink -f $i) = $DISK"
      done | LC_ALL=C sort -k4) |
      column -t

Delete pool

ceph tell mon.\* injectargs '--mon-allow-pool-delete=true'
ceph osd pool delete gnocchi gnocchi --yes-i-really-really-mean-it
ceph tell mon.\* injectargs '--mon-allow-pool-delete=false'

fix 'too many PGs' luminous

[global]
mon_max_pg_per_osd = 900
osd max pg per osd hard ratio = 8
mon allow pool delete = true
 
# restart all MONs and OSDs, one by one
 
ceph --admin-daemon /var/run/ceph/ceph-mon.{$id}.asok config get  mon_max_pg_per_osd
ceph --admin-daemon /var/run/ceph/ceph-osd.{$id}.asok config get osd_max_pg_per_osd_hard_ratio
 
rados lspools
ceph osd pool get .users.email pg_num
 
ceph osd pool create .users.email.new 8
rados cppool .users.email default.rgw.lc.new
ceph osd pool delete .users.email .users.email --yes-i-really-really-mean-it
ceph osd pool rename .users.email.new .users.email
ceph osd pool application enable .users.email rgw

Client packages

sudo apt-get install -y python3-rbd ceph-common ceph-fuse

Remove node from cluster

# shrink cluster:
https://docs.ceph.com/en/latest/rados/operations/add-or-rm-osds/#removing-osds-manual
https://stackoverflow.com/questions/45012905/removing-pool-mon-allow-pool-delete-config-option-to-true-before-you-can-destro
 
export TARGET_NODE=ceph1-dev
 
ceph osd out $(ceph osd ls-tree $TARGET_NODE)
ansible-playbook -i hosts.yml --limit "$TARGET_NODE" ./infrastructure-playbooks/shrink-osd.yml -e osd_to_kill=$(ceph osd ls-tree $TARGET_NODE) -e ireallymeanit=yes 
ceph mon remove $TARGET_NODE
ansible-playbook -i hosts.yml --limit $TARGET_NODE ./infrastructure-playbooks/shrink-mon.yml -e mon_to_kill=$TARGET_NODE -e ireallymeanit=yes
#ansible-playbook -i hosts.yml --limit $TARGET_NODE ./infrastructure-playbooks/shrink-rgw.yml -e rgw_to_kill=$TARGET_NODE -e ireallymeanit=yes
ansible-playbook -i hosts.yml --limit $TARGET_NODE ./infrastructure-playbooks/purge-container-cluster.yml -e ireallymeanit=yes
 
ceph --cluster ceph-cl1 -n mon -k /var/lib/ceph/mon/ceph-cl1-ctl1-dev/keyring mon_status
--format json

Delete

rados -p os-dev_glance ls
rados -p os-dev_glance cleanup --prefix benchmark_data

Compression

ceph osd pool set os-dev_glance compression_algorithm snappy
ceph osd pool set os-dev_glance compression_mode aggressive
ceph df detail

Benchmark

rados bench -p dev_glance 10 write

Mon

ceph tell mon.ceph3-lab compact
 
ceph config show mon.ceph1-lab | grep mon_data_avail_warn
 
ceph tell mon.\* injectargs '--mon_data_avail_warn=10'
 
# change / import key
ceph auth export client.cinder > /tmp/client.cinder
# vi /tmp/client.cinder
ceph auth import -i /tmp/client.cinder

Increase bucket count per user

RGW_USERS=$(radosgw-admin user list | jq -r ".[]")
 
for RGW_USER in ${RGW_USERS}; do
    radosgw-admin user info --uid=${RGW_USER} | grep foo
done
 
radosgw-admin user modify --uid=ae4452f43feb877bc967aec20778e7ab --max-buckets=10000

Links
https://github.com/ceph/ceph-nagios-plugins