automation-suite
2022.10
false
- 概述
- 要求
- 安装
- 安装后
- 集群管理
- 监控和警示
- 迁移和升级
- 特定于产品的配置
- 最佳实践和维护
- 故障排除
- 无法获取沙盒映像
- Pod 未显示在 ArgoCD 用户界面中
- Redis 探测器失败
- RKE2 服务器无法启动
- 在 UiPath 命名空间中找不到密码
- ArgoCD 在首次安装后进入“进行中”状态
- 意外不一致;手动运行 fsck
- MongoDB Pod 处于 CrashLoopBackOff 状态或在删除后处于“等待 PVC 配置”状态
- MongoDB Pod 从 4.4.4-ent 升级到 5.0.7-ent 失败
- 集群还原或回滚后服务运行状况不佳
- Pod 在 Init:0/X 中卡住
- Prometheus 处于 CrashLoopBackoff 状态,并出现内存不足 (OOM) 错误
- 监控仪表板中缺少 Ceph-rook 指标
- Pod 无法在代理环境中与 FQDN 通信
- 使用 Automation Suite 诊断工具
- 使用 Automation Suite 支持包工具
- 探索日志
如何自动清理 Longhorn 快照
重要 :
请注意此内容已使用机器翻译进行了部分本地化。
Automation Suite 安装指南
Last updated 2024年11月4日
如何自动清理 Longhorn 快照
要自动清理快照,您必须使用以下文件创建 Cron 作业。要创建 Cron 作业,请运行
kubectl apply -f snapshot-cleanup.yaml
。您可以相应地更改 schedule
参数。默认情况下,此 Cron 作业将删除超过 10 天的快照。您可以通过更新 days
变量来缩短此时间。
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
labels:
job: uipath-snapshot-cleanup
name: lh-snapshot-cleanup
namespace: longhorn-system
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 3
template:
metadata:
labels:
job: uipath-snapshot-cleanup
name: uipath-snapshot-cleanup
spec:
containers:
- command:
- sh
args:
- -ec
- |
url=$(kubectl get svc -n longhorn-system longhorn-backend -o json | jq -r '.spec | (.clusterIP|tostring) + ":" + (.ports[0].port|tostring)')
days=10
[[ -z "$url" ]] && echo "Missing longhorn URL" && exit 1
curl -s --connect-timeout 30 ${url}/v1 >> /dev/null || { echo "Unable to connect to longhorn backend"; exit 1; }
echo "Deleting snapshots older than $days days"
vols=$( (curl -s -X GET ${url}/v1/volumes |jq -r '.data[].name') )
function delete_snapshot() {
local vol=$1
local snap=$2
[[ -z "$vol" || -z "$snap" ]] && echo "Error: delete_snapshot: Empty argument" && return 1
curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotDelete -d '{"name": "'$snap'"}'
echo "Snapshot=$snap deleted for volume=$vol"
}
function cleanup_volume() {
local vol=$1
local deleted_snap=0
[[ -z "$vol" ]] && echo "Error: cleanup_volume: Empty argument" && return 1
# fetch list of snapshot
snaps=$( (curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotList | jq -r '.data[] | select(.usercreated==true) | .name' ) )
for i in ${snaps[@]}; do
if [[ $i == "volume-head" ]]; then
continue
fi
snapTime=$(curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotGet -d '{"name":"'$i'"}' |jq -r '.created')
currentTime=$(date "+%s")
timeDiff=$(($currentTime - ($(date -d $snapTime "+%s")) / 86400))
if [[ $timeDiff -lt $days ]]; then
echo "Ignoring snapshot $i, since it is older than $timeDiff days"
continue
fi
delete_snapshot $vol $i
deleted_snap=$((deleted_snap+1))
done
if [[ "$deleted_snap" -gt 0 ]]; then
curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotPurge >> /dev/null
fi
}
for i in ${vols[@]}; do
cleanup_volume $i
done
exit 0
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: uipath/sf-k8-utils-rhel:0.9
imagePullPolicy: IfNotPresent
name: uipath-snapshot-cleanup
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
priorityClassName: system-cluster-critical
restartPolicy: OnFailure
schedulerName: default-scheduler
securityContext: {}
serviceAccount: longhorn-service-account
serviceAccountName: longhorn-service-account
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
key: kubernetes.io/hostname
operator: Equal
value: server0
schedule: '0 11 * * *'
successfulJobsHistoryLimit: 1
suspend: false
apiVersion: batch/v1
kind: CronJob
metadata:
annotations:
labels:
job: uipath-snapshot-cleanup
name: lh-snapshot-cleanup
namespace: longhorn-system
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 3
template:
metadata:
labels:
job: uipath-snapshot-cleanup
name: uipath-snapshot-cleanup
spec:
containers:
- command:
- sh
args:
- -ec
- |
url=$(kubectl get svc -n longhorn-system longhorn-backend -o json | jq -r '.spec | (.clusterIP|tostring) + ":" + (.ports[0].port|tostring)')
days=10
[[ -z "$url" ]] && echo "Missing longhorn URL" && exit 1
curl -s --connect-timeout 30 ${url}/v1 >> /dev/null || { echo "Unable to connect to longhorn backend"; exit 1; }
echo "Deleting snapshots older than $days days"
vols=$( (curl -s -X GET ${url}/v1/volumes |jq -r '.data[].name') )
function delete_snapshot() {
local vol=$1
local snap=$2
[[ -z "$vol" || -z "$snap" ]] && echo "Error: delete_snapshot: Empty argument" && return 1
curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotDelete -d '{"name": "'$snap'"}'
echo "Snapshot=$snap deleted for volume=$vol"
}
function cleanup_volume() {
local vol=$1
local deleted_snap=0
[[ -z "$vol" ]] && echo "Error: cleanup_volume: Empty argument" && return 1
# fetch list of snapshot
snaps=$( (curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotList | jq -r '.data[] | select(.usercreated==true) | .name' ) )
for i in ${snaps[@]}; do
if [[ $i == "volume-head" ]]; then
continue
fi
snapTime=$(curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotGet -d '{"name":"'$i'"}' |jq -r '.created')
currentTime=$(date "+%s")
timeDiff=$(($currentTime - ($(date -d $snapTime "+%s")) / 86400))
if [[ $timeDiff -lt $days ]]; then
echo "Ignoring snapshot $i, since it is older than $timeDiff days"
continue
fi
delete_snapshot $vol $i
deleted_snap=$((deleted_snap+1))
done
if [[ "$deleted_snap" -gt 0 ]]; then
curl -s -X POST ${url}/v1/volumes/${vol}?action=snapshotPurge >> /dev/null
fi
}
for i in ${vols[@]}; do
cleanup_volume $i
done
exit 0
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: uipath/sf-k8-utils-rhel:0.9
imagePullPolicy: IfNotPresent
name: uipath-snapshot-cleanup
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
priorityClassName: system-cluster-critical
restartPolicy: OnFailure
schedulerName: default-scheduler
securityContext: {}
serviceAccount: longhorn-service-account
serviceAccountName: longhorn-service-account
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
key: kubernetes.io/hostname
operator: Equal
value: server0
schedule: '0 11 * * *'
successfulJobsHistoryLimit: 1
suspend: false