Automation Suite 安装指南

上次更新日期 2024年12月16日

监控仪表板中缺少 Ceph-rook 指标

描述

在某些情况下，监控仪表板中缺少 Ceph-rook 指标。发生这种情况可能是，由于各种原因，active rook-ceph-mgr 转换为 b。

解决方案

要解决此问题，请运行以下脚本：

#!/bin/bash

set -euo pipefail

# Enable kubectl command
export KUBECONFIG="/etc/rancher/rke2/rke2.yaml"
export PATH="$PATH:/var/lib/rancher/rke2/bin"

function clearCephMgrAlert() {
    local ceph_status
    local active_ceph_mgr

    # Check the curernt rook-ceph cluster status
    ceph_status=$(kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph status --format json-pretty | jq -r '.health.status')
    if [[ "${ceph_status}" != "HEALTH_OK" ]]; then
        echo "Error: Your rook-ceph cluster is not healthy. Please review your environment."
        return 1
    fi
    active_ceph_mgr=$(kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph mgr dump | jq -r '.active_name')
    if [[ "${active_ceph_mgr}" != "a" ]]; then
        echo "Curently your active rook-ceph-mgr is ${active_ceph_mgr}. Failing over to to the standby mgr a to fix CephMgrIsAbsent alert."
        kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph mgr fail ${active_ceph_mgr}
        if [[ $? -ne 0 ]]; then
            echo "Error: Failed to failover to the standby mgr a. Please manually check your ceph status."
            return 1
        fi
    fi
    echo "Your active ceph-mgr should be failed over to a. Please wait for several minutes and ensure CephMgrIsAbsent alert is cleared."
    return 0
}

clearCephMgrAlert
#!/bin/bash

set -euo pipefail

# Enable kubectl command
export KUBECONFIG="/etc/rancher/rke2/rke2.yaml"
export PATH="$PATH:/var/lib/rancher/rke2/bin"

function clearCephMgrAlert() {
    local ceph_status
    local active_ceph_mgr

    # Check the curernt rook-ceph cluster status
    ceph_status=$(kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph status --format json-pretty | jq -r '.health.status')
    if [[ "${ceph_status}" != "HEALTH_OK" ]]; then
        echo "Error: Your rook-ceph cluster is not healthy. Please review your environment."
        return 1
    fi
    active_ceph_mgr=$(kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph mgr dump | jq -r '.active_name')
    if [[ "${active_ceph_mgr}" != "a" ]]; then
        echo "Curently your active rook-ceph-mgr is ${active_ceph_mgr}. Failing over to to the standby mgr a to fix CephMgrIsAbsent alert."
        kubectl -n rook-ceph exec deploy/rook-ceph-tools -- ceph mgr fail ${active_ceph_mgr}
        if [[ $? -ne 0 ]]; then
            echo "Error: Failed to failover to the standby mgr a. Please manually check your ceph status."
            return 1
        fi
    fi
    echo "Your active ceph-mgr should be failed over to a. Please wait for several minutes and ensure CephMgrIsAbsent alert is cleared."
    return 0
}

clearCephMgrAlert