fix: 更新告警参数描述,修正CPU使用率计算逻辑, license检查天数错误
This commit is contained in:
@@ -873,7 +873,7 @@ Minor=3
|
|||||||
Warning=4
|
Warning=4
|
||||||
|
|
||||||
AddInfo: 告警补充信息
|
AddInfo: 告警补充信息
|
||||||
cpuUseGt: CPU使用率大于, 范围0~100%
|
cpuUseGt: CPU使用率大于, 范围0~100*CPU核心数
|
||||||
memUseGt: 内存使用率大于, 范围0~100%
|
memUseGt: 内存使用率大于, 范围0~100%
|
||||||
diskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.
|
diskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.
|
||||||
|
|
||||||
@@ -891,7 +891,7 @@ Minor=3
|
|||||||
Warning=4
|
Warning=4
|
||||||
|
|
||||||
AddInfo: Additional information on alarms
|
AddInfo: Additional information on alarms
|
||||||
cpuUseGt: CPU utilization greater than, range 0 to 100%
|
cpuUseGt: CPU utilization is greater than, range 0~100*number of CPU cores
|
||||||
memUseGt: Memory utilization greater than, range 0 to 100%
|
memUseGt: Memory utilization greater than, range 0 to 100%
|
||||||
diskUseGt: Disk utilization greater than, range 0 to 100%');
|
diskUseGt: Disk utilization greater than, range 0 to 100%');
|
||||||
INSERT INTO "sys_i18n" VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check');
|
INSERT INTO "sys_i18n" VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check');
|
||||||
|
|||||||
@@ -767,7 +767,7 @@ INSERT INTO `sys_i18n` VALUES (749, 'dictType.cdr_sip_code_cause', 'IMS-Voice-SI
|
|||||||
INSERT INTO `sys_i18n` VALUES (750, 'job.backup_export_log', '备份-日志数据定期导出', 'Backup-Periodic export of Log Data');
|
INSERT INTO `sys_i18n` VALUES (750, 'job.backup_export_log', '备份-日志数据定期导出', 'Backup-Periodic export of Log Data');
|
||||||
INSERT INTO `sys_i18n` VALUES (751, 'job.backup_export_log_remark', 'dataType: 类型支持 operate/login\nfileType: 文件类型 csv/xlsx\nhour: 数据时间从任务执行时间前的小时数', 'Backup-Periodic export of dataType: type support operate/login\nfileType: file type csv/xlsx\nhour: data time from the hour before the task execution time');
|
INSERT INTO `sys_i18n` VALUES (751, 'job.backup_export_log_remark', 'dataType: 类型支持 operate/login\nfileType: 文件类型 csv/xlsx\nhour: 数据时间从任务执行时间前的小时数', 'Backup-Periodic export of dataType: type support operate/login\nfileType: file type csv/xlsx\nhour: data time from the hour before the task execution time');
|
||||||
INSERT INTO `sys_i18n` VALUES (752, 'job.ne_alarm_state_check_cmd', '网元告警-内存/CPU/磁盘检查', 'NE Alarm-Memory/CPU/Disk Checks');
|
INSERT INTO `sys_i18n` VALUES (752, 'job.ne_alarm_state_check_cmd', '网元告警-内存/CPU/磁盘检查', 'NE Alarm-Memory/CPU/Disk Checks');
|
||||||
INSERT INTO `sys_i18n` VALUES (753, 'job.ne_alarm_state_check_cmd_remark', '检查网元的内存/CPU/磁盘检查健康状况,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ncpuUseGt: CPU使用率大于, 范围0~100%\r\nmemUseGt: 内存使用率大于, 范围0~100%\r\ndiskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ncpuUseGt: CPU utilization greater than, range 0 to 100%\r\nmemUseGt: Memory utilization greater than, range 0 to 100%\r\ndiskUseGt: Disk utilization greater than, range 0 to 100%');
|
INSERT INTO `sys_i18n` VALUES (753, 'job.ne_alarm_state_check_cmd_remark', '检查网元的内存/CPU/磁盘检查健康状况,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ncpuUseGt: CPU使用率大于, 范围0~100*CPU核心数\r\nmemUseGt: 内存使用率大于, 范围0~100%\r\ndiskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ncpuUseGt: CPU utilization is greater than, range 0~100*number of CPU cores\r\nmemUseGt: Memory utilization greater than, range 0 to 100%\r\ndiskUseGt: Disk utilization greater than, range 0 to 100%');
|
||||||
INSERT INTO `sys_i18n` VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check');
|
INSERT INTO `sys_i18n` VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check');
|
||||||
INSERT INTO `sys_i18n` VALUES (755, 'job.ne_alarm_state_check_license_remark', '检查网元的License是否即将到期,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ndayLt: 天数小于,默认30天', 'Checks if the network element is License is about to expire and sends an alert if a threshold is crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ndayLt: Days less than, default 30 days');
|
INSERT INTO `sys_i18n` VALUES (755, 'job.ne_alarm_state_check_license_remark', '检查网元的License是否即将到期,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ndayLt: 天数小于,默认30天', 'Checks if the network element is License is about to expire and sends an alert if a threshold is crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ndayLt: Days less than, default 30 days');
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package ne_alarm_state_check_cmd
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -26,6 +27,8 @@ var NewProcessor = &NeAlarmStateCheckCMDProcessor{
|
|||||||
alarmService: neDataService.NewAlarm,
|
alarmService: neDataService.NewAlarm,
|
||||||
wsSendService: wsService.NewWSSend,
|
wsSendService: wsService.NewWSSend,
|
||||||
count: 0,
|
count: 0,
|
||||||
|
triggerMax: 4,
|
||||||
|
triggerCount: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
// NeAlarmStateCheckCMDProcessor 网元告警内存/CPU/磁盘检查
|
// NeAlarmStateCheckCMDProcessor 网元告警内存/CPU/磁盘检查
|
||||||
@@ -36,7 +39,8 @@ type NeAlarmStateCheckCMDProcessor struct {
|
|||||||
alarmService *neDataService.Alarm // 告警信息服务
|
alarmService *neDataService.Alarm // 告警信息服务
|
||||||
wsSendService *wsService.WSSend // ws发送服务
|
wsSendService *wsService.WSSend // ws发送服务
|
||||||
count int // 执行次数
|
count int // 执行次数
|
||||||
|
triggerMax int // 阈值连续触发次数大于才会产生告警
|
||||||
|
triggerCount int // 阈值连续触发次数
|
||||||
}
|
}
|
||||||
|
|
||||||
// alarmParams 告警参数
|
// alarmParams 告警参数
|
||||||
@@ -47,7 +51,7 @@ type alarmParams struct {
|
|||||||
SpecificProblem string `json:"specificProblem"` // Alarm Cause: CPU/Menory/Disk status received from target NE reaches the threshold
|
SpecificProblem string `json:"specificProblem"` // Alarm Cause: CPU/Menory/Disk status received from target NE reaches the threshold
|
||||||
SpecificProblemID string `json:"specificProblemId"` // AC10100
|
SpecificProblemID string `json:"specificProblemId"` // AC10100
|
||||||
AddInfo string `json:"addInfo"` // 告警补充信息
|
AddInfo string `json:"addInfo"` // 告警补充信息
|
||||||
CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100%
|
CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100*CPU核心数
|
||||||
MemUseGt int64 `json:"memUseGt"` // 内存使用率大于, 范围0~100%
|
MemUseGt int64 `json:"memUseGt"` // 内存使用率大于, 范围0~100%
|
||||||
DiskUseGt int64 `json:"diskUseGt"` // 磁盘使用率大于, 范围0~100%
|
DiskUseGt int64 `json:"diskUseGt"` // 磁盘使用率大于, 范围0~100%
|
||||||
|
|
||||||
@@ -72,8 +76,9 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) {
|
|||||||
return nil, fmt.Errorf("json params err: %v", err)
|
return nil, fmt.Errorf("json params err: %v", err)
|
||||||
}
|
}
|
||||||
// 检查使用率
|
// 检查使用率
|
||||||
if params.CPUUseGt > 100 || params.CPUUseGt < 0 {
|
numCPU := runtime.NumCPU()
|
||||||
return nil, fmt.Errorf("cpuUseGt must be between 0 and 100")
|
if params.CPUUseGt > int64(numCPU*100) || params.CPUUseGt < 0 {
|
||||||
|
return nil, fmt.Errorf("cpuUseGt must be between 0 and 100 * NumCPU")
|
||||||
}
|
}
|
||||||
if params.MemUseGt > 100 || params.MemUseGt < 0 {
|
if params.MemUseGt > 100 || params.MemUseGt < 0 {
|
||||||
return nil, fmt.Errorf("memUseGt must be between 0 and 100")
|
return nil, fmt.Errorf("memUseGt must be between 0 and 100")
|
||||||
@@ -137,6 +142,7 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) {
|
|||||||
// 进行新增
|
// 进行新增
|
||||||
newAlarm, err := s.alarmNew(neInfo, params)
|
newAlarm, err := s.alarmNew(neInfo, params)
|
||||||
params.AddInfo = addInfo // 恢复附加信息
|
params.AddInfo = addInfo // 恢复附加信息
|
||||||
|
s.triggerCount = 0 // 重置连续触发次数
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result[neTypeAndId] = err.Error()
|
result[neTypeAndId] = err.Error()
|
||||||
continue
|
continue
|
||||||
@@ -227,7 +233,10 @@ func (s NeAlarmStateCheckCMDProcessor) serverState(state map[string]any, cpuUseG
|
|||||||
warnMsg = append(warnMsg, fmt.Sprintf("disk usage %.2f%%", sysDiskUsage))
|
warnMsg = append(warnMsg, fmt.Sprintf("disk usage %.2f%%", sysDiskUsage))
|
||||||
}
|
}
|
||||||
if len(warnMsg) > 0 {
|
if len(warnMsg) > 0 {
|
||||||
return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", "))
|
s.triggerCount++
|
||||||
|
if s.triggerCount > s.triggerMax {
|
||||||
|
return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", "))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -120,8 +120,15 @@ func (s *NeAlarmStateCheckLicenseProcessor) Execute(data any) (any, error) {
|
|||||||
}
|
}
|
||||||
// 未记录
|
// 未记录
|
||||||
if alarmStatus == "" {
|
if alarmStatus == "" {
|
||||||
|
addInfo := params.AddInfo
|
||||||
|
if params.AddInfo != "" {
|
||||||
|
params.AddInfo = params.AddInfo + ", " + err.Error()
|
||||||
|
} else {
|
||||||
|
params.AddInfo = err.Error()
|
||||||
|
}
|
||||||
// 进行新增
|
// 进行新增
|
||||||
newAlarm, err := s.alarmNew(neInfo, params)
|
newAlarm, err := s.alarmNew(neInfo, params)
|
||||||
|
params.AddInfo = addInfo // 恢复附加信息
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result[neTypeAndId] = err.Error()
|
result[neTypeAndId] = err.Error()
|
||||||
continue
|
continue
|
||||||
@@ -150,7 +157,7 @@ func (s NeAlarmStateCheckLicenseProcessor) serverState(state map[string]any, day
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 计算距离天数
|
// 计算距离天数
|
||||||
daysLeft := int64(time.Since(expireTime).Hours() / 24)
|
daysLeft := int64(time.Until(expireTime).Hours() / 24)
|
||||||
if daysLeft < dayLt {
|
if daysLeft < dayLt {
|
||||||
return fmt.Errorf("license will expire in %d days", daysLeft)
|
return fmt.Errorf("license will expire in %d days", daysLeft)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user