OpenStack: compute (nova)

CLI
https://docs.openstack.org/nova/latest/admin/services.html

# list hypervisor details
openstack hypervisor list --long
 
# list VMs with availability zone
openstack server list --long -c ID -c Name -c Status -c Networks -c "Image Name" -c "Flavor Name" -c "Availability Zone"
 
# list VMs on all hypervisor
openstack server list --all --long  -c ID -c Name -c Host
 
# list VMs on specific hypervisor
openstack server list --all-projects --host ${COMPUTE_NODE}
 
# get VM count by hypervisor
openstack server list --all --long  -c Host -f value | sort | uniq -c
 
# list compute nodes
openstack compute service list --service nova-compute
 
# list compute service
openstack compute service list --host ${OS_NODE}
 
# add / enable compute service
openstack compute service set --enable com1-dev nova-compute
 
# disable compute service
for OS_SERVICE in $(openstack compute service list --host ${OS_NODE} -c Binary -f value); do
    openstack compute service set --disable --disable-reason "Maintenance" ${OS_NODE} ${OS_SERVICE}
done
 
# Search for server witch status error
openstack server list --all --status ERROR
 
# Search for server with status resizing
openstack server list --all --status=VERIFY_RESIZE
 
# List instances / VMs
openstack server list
openstack server list -c ID -c Name -c Status -c Networks -c Host --long
 
# Show VM diagnostics / statistics
nova diagnostics ${SERVER_ID}
openstack server show --diagnostics ${SERVER_ID}
 
# show hypervisor usage
openstack usage list

Disable compute node

openstack compute service set --disable os-com2-dev nova-compute
openstack hypervisor list 
openstack compute service list --service nova-compute
openstack aggregate show <NAME>

Debug

# Search for server processes on wrong compute node
for COMPUTE_NODE in $(openstack compute service list --service nova-compute -c Host -f value); do
    for UUID in $(ssh ${COMPUTE_NODE} pgrep qemu -a | grep -o -P '(?<=-uuid ).*(?= -smbios)'); do
        VM_HOST=$(openstack server show -c "OS-EXT-SRV-ATTR:host" -f value ${UUID})
        if [ -z "${VM_HOST}" ]; then
            echo "Server process ${UUID} on ${COMPUTE_NODE} not available in OpenStack"
        else
            if [ "${VM_HOST}" != "${COMPUTE_NODE}" ]; then
                echo "VM ${UUID} on wrong compute node ${COMPUTE_NODE}"
            fi
        fi
    done
done

Remove compute service / server

openstack server list --all-projects --host ${NODE_ID}
openstack compute service list --host ${NODE_ID}
openstack compute service delete ${NODE_ID}

Manually rebalance VMs

# show hypervisor usage
openstack hypervisor list --long
 
# get processes with uses swap
grep VmSwap /proc/*/status | grep -v " 0 kB"
 
# get VMs with high CPU usage
ssh compute-node-2
 
# VMs by CPU usage
ssh ${COMPUTE_NODE} ps -eo pid,%cpu,cmd --sort="-%cpu" --no-headers | head -5 | grep -o -P '^[0-9]?.*(?<=-uuid ).*(?= -smbios)\b' | awk '{ print $1,$2,$NF }'
 
# VMs by RAM usage
ssh ${COMPUTE_NODE} ps -eo pid,size,cmd --sort="-size" --no-headers | head -5 | grep -o -P '^[0-9]?.*(?<=-uuid ).*(?= -smbios)\b' | awk '{ print $1,$2,$NF }'
 
openstack server show ${SEVER_ID}
 
# live migrate VM to specific hypervisor
openstack server list --all --status ACTIVE --host comX-stage | grep large
openstack server migrate --os-compute-api-version 2.30 --live-migration --wait --host comX-stage ${SEVER_ID}

evacuate
https://docs.openstack.org/nova/latest/admin/evacuate.html

openstack server list --all-projects --host com3-dev
openstack server set --state error 8041442a-9775-47c8-91be-e27286e731bd
nova evacuate 8041442a-9775-47c8-91be-e27286e731bd

aggregate

openstack aggregate list
openstack aggregate show 9
openstack aggregate add host 9 com10-stage

Add compute node

openstack compute service list
 
vi /etc/kolla/inventory
...
[external-compute]
new_compute_node_2
...
 
cd /etc/kolla/config/foo
kolla-ansible -i inventory deploy --limit comX-dev -e 'ansible_python_interpreter=/usr/bin/python3'

Remove compute node

COMPUTE_HOST=com1-dev
 
# ensure all VMs are migrated out from the compute node
openstack server list --all-projects --host ${COMPUTE_HOST}
 
# remove compute service
COMPUTE_SERVICE_ID=$(openstack compute service list --service nova-compute --host ${COMPUTE_HOST} -c ID -f value)
echo ${COMPUTE_SERVICE_ID}
openstack compute service delete ${COMPUTE_SERVICE_ID}
 
# remove network service
NETWORK_AGENT_ID=$(openstack network agent list --host ${HYPERVISOR_ID} -c ID -f value)
echo ${NETWORK_AGENT_ID}
openstack network agent delete ${NETWORK_AGENT_ID}
 
# OPTIONAL: check no remaining resource_providers_allocations
http://www.panticz.de/openstack/resource-provider
 
# OPTIONAL: delete resource provider
openstack catalog list | grep placement
PLACEMENT_ENDPOINT=http://nova-placement.service.dev.i.example.com:8780
 
TOKEN=$(openstack token issue -f value -c id)
curl ${PLACEMENT_ENDPOINT}/resource_providers -H "x-auth-token: ${TOKEN}" | python -m json.tool
 
# delete resource provider
UUID=bf003af0-3541-4220-a5d5-c7c2e57abf22
curl ${PLACEMENT_ENDPOINT}/resource_providers/${UUID} -H "x-auth-token: $TOKEN" -X DELETE

Get CPU flags

cat /proc/cpuinfo | grep flags | head -1  | cut -d ":" -f2 | tr " " "\n" | sort

Get VM initially availability zones

mysql -u root -p"$DB_PASS" -h ctl1-stage.stage.i.example.com -D nova_api -e 'select json_extract(spec,"$.\"nova_object.data\".availability_zone") as availability_zone from request_specs where instance_uuid="78fe0882-c809-4ab7-99d4-920731fed315"\G' 

Get qemu version

docker exec -t nova_libvirt qemu-x86_64 -version

Remove deleted VM in nova DB

mysql
use nova;
select uuid, hostname, vm_state, task_state, power_state, deleted from instances where uuid="43699fce-3b64-41e5-bb82-9e8bc4f5eeda";
update instances set deleted=1, vm_state="deleted" where uuid="5345281e-eafb-4de7-9c36-9ff00fe8635d";