From 73958f43336adefdc24c595a35e2074cf9d88155 Mon Sep 17 00:00:00 2001 From: TsMask <340112800@qq.com> Date: Mon, 12 May 2025 19:07:42 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=9B=B4=E6=96=B0=E5=91=8A=E8=AD=A6?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E6=8F=8F=E8=BF=B0=EF=BC=8C=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?CPU=E4=BD=BF=E7=94=A8=E7=8E=87=E8=AE=A1=E7=AE=97=E9=80=BB?= =?UTF-8?q?=E8=BE=91,=20license=E6=A3=80=E6=9F=A5=E5=A4=A9=E6=95=B0?= =?UTF-8?q?=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build/database/lite/install/sys_i18n.sql | 4 ++-- build/database/std/install/sys_i18n.sql | 2 +- .../ne_alarm_state_check_cmd.go | 19 ++++++++++++++----- .../ne_alarm_state_check_license.go | 9 ++++++++- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/build/database/lite/install/sys_i18n.sql b/build/database/lite/install/sys_i18n.sql index 2d9c4fe9..fd6e1663 100644 --- a/build/database/lite/install/sys_i18n.sql +++ b/build/database/lite/install/sys_i18n.sql @@ -873,7 +873,7 @@ Minor=3 Warning=4 AddInfo: 告警补充信息 -cpuUseGt: CPU使用率大于, 范围0~100% +cpuUseGt: CPU使用率大于, 范围0~100*CPU核心数 memUseGt: 内存使用率大于, 范围0~100% diskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed. @@ -891,7 +891,7 @@ Minor=3 Warning=4 AddInfo: Additional information on alarms -cpuUseGt: CPU utilization greater than, range 0 to 100% +cpuUseGt: CPU utilization is greater than, range 0~100*number of CPU cores memUseGt: Memory utilization greater than, range 0 to 100% diskUseGt: Disk utilization greater than, range 0 to 100%'); INSERT INTO "sys_i18n" VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check'); diff --git a/build/database/std/install/sys_i18n.sql b/build/database/std/install/sys_i18n.sql index 3a82c2f4..7d91036f 100644 --- a/build/database/std/install/sys_i18n.sql +++ b/build/database/std/install/sys_i18n.sql @@ -767,7 +767,7 @@ INSERT INTO `sys_i18n` VALUES (749, 'dictType.cdr_sip_code_cause', 'IMS-Voice-SI INSERT INTO `sys_i18n` VALUES (750, 'job.backup_export_log', '备份-日志数据定期导出', 'Backup-Periodic export of Log Data'); INSERT INTO `sys_i18n` VALUES (751, 'job.backup_export_log_remark', 'dataType: 类型支持 operate/login\nfileType: 文件类型 csv/xlsx\nhour: 数据时间从任务执行时间前的小时数', 'Backup-Periodic export of dataType: type support operate/login\nfileType: file type csv/xlsx\nhour: data time from the hour before the task execution time'); INSERT INTO `sys_i18n` VALUES (752, 'job.ne_alarm_state_check_cmd', '网元告警-内存/CPU/磁盘检查', 'NE Alarm-Memory/CPU/Disk Checks'); -INSERT INTO `sys_i18n` VALUES (753, 'job.ne_alarm_state_check_cmd_remark', '检查网元的内存/CPU/磁盘检查健康状况,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ncpuUseGt: CPU使用率大于, 范围0~100%\r\nmemUseGt: 内存使用率大于, 范围0~100%\r\ndiskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ncpuUseGt: CPU utilization greater than, range 0 to 100%\r\nmemUseGt: Memory utilization greater than, range 0 to 100%\r\ndiskUseGt: Disk utilization greater than, range 0 to 100%'); +INSERT INTO `sys_i18n` VALUES (753, 'job.ne_alarm_state_check_cmd_remark', '检查网元的内存/CPU/磁盘检查健康状况,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ncpuUseGt: CPU使用率大于, 范围0~100*CPU核心数\r\nmemUseGt: 内存使用率大于, 范围0~100%\r\ndiskUseGt: 磁盘使用率大于, 范围0~100%', 'Checks the memory/CPU/disk check health of the network element and sends alerts when thresholds are crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ncpuUseGt: CPU utilization is greater than, range 0~100*number of CPU cores\r\nmemUseGt: Memory utilization greater than, range 0 to 100%\r\ndiskUseGt: Disk utilization greater than, range 0 to 100%'); INSERT INTO `sys_i18n` VALUES (754, 'job.ne_alarm_state_check_license', '网元告警-License到期检查', 'NE Alarm-License Expire Check'); INSERT INTO `sys_i18n` VALUES (755, 'job.ne_alarm_state_check_license_remark', '检查网元的License是否即将到期,在出现过阈值时发出警报。\r\n\r\nAlarm type:\r\nCommunicationAlarm=1\r\nEquipmentAlarm=2\r\nProcessingFailure=3\r\nEnvironmentalAlarm=4\r\nQualityOfServiceAlarm=5\r\n\r\nSeverity:\r\nCritical=1\r\nMajor=2\r\nMinor=3\r\nWarning=4\r\n\r\nAddInfo: 告警补充信息\r\ndayLt: 天数小于,默认30天', 'Checks if the network element is License is about to expire and sends an alert if a threshold is crossed.\n\nAlarm type:\nCommunicationAlarm=1\nEquipmentAlarm=2\nProcessingFailure=3\nEnvironmentalAlarm=4\nQualityOfServiceAlarm=5\n\nSeverity:\nCritical=1\nMajor=2\nMinor=3\nWarning=4\r\n\r\nAddInfo: Additional information on alarms\r\ndayLt: Days less than, default 30 days'); diff --git a/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go b/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go index 20017f66..12d38e85 100644 --- a/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go +++ b/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go @@ -3,6 +3,7 @@ package ne_alarm_state_check_cmd import ( "encoding/json" "fmt" + "runtime" "sort" "strconv" "strings" @@ -26,6 +27,8 @@ var NewProcessor = &NeAlarmStateCheckCMDProcessor{ alarmService: neDataService.NewAlarm, wsSendService: wsService.NewWSSend, count: 0, + triggerMax: 4, + triggerCount: 0, } // NeAlarmStateCheckCMDProcessor 网元告警内存/CPU/磁盘检查 @@ -36,7 +39,8 @@ type NeAlarmStateCheckCMDProcessor struct { alarmService *neDataService.Alarm // 告警信息服务 wsSendService *wsService.WSSend // ws发送服务 count int // 执行次数 - + triggerMax int // 阈值连续触发次数大于才会产生告警 + triggerCount int // 阈值连续触发次数 } // alarmParams 告警参数 @@ -47,7 +51,7 @@ type alarmParams struct { SpecificProblem string `json:"specificProblem"` // Alarm Cause: CPU/Menory/Disk status received from target NE reaches the threshold SpecificProblemID string `json:"specificProblemId"` // AC10100 AddInfo string `json:"addInfo"` // 告警补充信息 - CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100% + CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100*CPU核心数 MemUseGt int64 `json:"memUseGt"` // 内存使用率大于, 范围0~100% DiskUseGt int64 `json:"diskUseGt"` // 磁盘使用率大于, 范围0~100% @@ -72,8 +76,9 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) { return nil, fmt.Errorf("json params err: %v", err) } // 检查使用率 - if params.CPUUseGt > 100 || params.CPUUseGt < 0 { - return nil, fmt.Errorf("cpuUseGt must be between 0 and 100") + numCPU := runtime.NumCPU() + if params.CPUUseGt > int64(numCPU*100) || params.CPUUseGt < 0 { + return nil, fmt.Errorf("cpuUseGt must be between 0 and 100 * NumCPU") } if params.MemUseGt > 100 || params.MemUseGt < 0 { return nil, fmt.Errorf("memUseGt must be between 0 and 100") @@ -137,6 +142,7 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) { // 进行新增 newAlarm, err := s.alarmNew(neInfo, params) params.AddInfo = addInfo // 恢复附加信息 + s.triggerCount = 0 // 重置连续触发次数 if err != nil { result[neTypeAndId] = err.Error() continue @@ -227,7 +233,10 @@ func (s NeAlarmStateCheckCMDProcessor) serverState(state map[string]any, cpuUseG warnMsg = append(warnMsg, fmt.Sprintf("disk usage %.2f%%", sysDiskUsage)) } if len(warnMsg) > 0 { - return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", ")) + s.triggerCount++ + if s.triggerCount > s.triggerMax { + return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", ")) + } } return nil } diff --git a/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go b/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go index 57d6d5ae..371ec409 100644 --- a/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go +++ b/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go @@ -120,8 +120,15 @@ func (s *NeAlarmStateCheckLicenseProcessor) Execute(data any) (any, error) { } // 未记录 if alarmStatus == "" { + addInfo := params.AddInfo + if params.AddInfo != "" { + params.AddInfo = params.AddInfo + ", " + err.Error() + } else { + params.AddInfo = err.Error() + } // 进行新增 newAlarm, err := s.alarmNew(neInfo, params) + params.AddInfo = addInfo // 恢复附加信息 if err != nil { result[neTypeAndId] = err.Error() continue @@ -150,7 +157,7 @@ func (s NeAlarmStateCheckLicenseProcessor) serverState(state map[string]any, day } // 计算距离天数 - daysLeft := int64(time.Since(expireTime).Hours() / 24) + daysLeft := int64(time.Until(expireTime).Hours() / 24) if daysLeft < dayLt { return fmt.Errorf("license will expire in %d days", daysLeft) }