diff --git a/features/fm/alarm.go b/features/fm/alarm.go index 7883ee1d..cb1d305f 100644 --- a/features/fm/alarm.go +++ b/features/fm/alarm.go @@ -15,6 +15,10 @@ import ( "be.ems/lib/global" "be.ems/lib/log" "be.ems/lib/services" + "be.ems/src/framework/constants" + neDataModel "be.ems/src/modules/network_data/model" + neDataService "be.ems/src/modules/network_data/service" + traceService "be.ems/src/modules/trace/service" "xorm.io/xorm" "github.com/go-resty/resty/v2" @@ -476,6 +480,23 @@ func PostAlarmFromNF(w http.ResponseWriter, r *http.Request) { log.Error("Failed to AlarmSMSForward:", err) } } + + // 网元重启后,清除活动告警 + if alarmData.AlarmCode == constants.ALARM_EVENT_REBOOT { + alarmService := neDataService.NewAlarm + rows := alarmService.Find(neDataModel.Alarm{ + NeType: alarmData.NeType, + NeId: alarmData.NeId, + AlarmStatus: "1", + }) + for _, v := range rows { + alarmService.AlarmClearByIds([]string{v.ID}, "system") + } + } + // 网元重启后,有跟踪任务的需要重新补发启动任务 + if alarmData.AlarmCode == constants.ALARM_EVENT_REBOOT { + traceService.NewTraceTask.RunUnstopped() + } } services.ResponseStatusOK200Null(w) @@ -788,6 +809,23 @@ func GetAlarmFromNF(w http.ResponseWriter, r *http.Request) { log.Error("Failed to AlarmSMSForward:", err) } } + + // 网元重启后,清除活动告警 + if alarmData.AlarmCode == constants.ALARM_EVENT_REBOOT { + alarmService := neDataService.NewAlarm + rows := alarmService.Find(neDataModel.Alarm{ + NeType: alarmData.NeType, + NeId: alarmData.NeId, + AlarmStatus: "1", + }) + for _, v := range rows { + alarmService.AlarmClearByIds([]string{v.ID}, "system") + } + } + // 网元重启后,有跟踪任务的需要重新补发启动任务 + if alarmData.AlarmCode == constants.ALARM_EVENT_REBOOT { + traceService.NewTraceTask.RunUnstopped() + } } log.Warn("Failed to insert alarm data:", err) } diff --git a/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go b/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go index 7604ee80..7d9971fc 100644 --- a/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go +++ b/src/modules/crontask/processor/ne_alarm_state_check_cmd/ne_alarm_state_check_cmd.go @@ -3,6 +3,7 @@ package ne_alarm_state_check_cmd import ( "encoding/json" "fmt" + "runtime" "sort" "strconv" "strings" @@ -26,6 +27,8 @@ var NewProcessor = &NeAlarmStateCheckCMDProcessor{ alarmService: neDataService.NewAlarm, wsSendService: wsService.NewWSSend, count: 0, + triggerMax: 4, + triggerCount: 0, } // NeAlarmStateCheckCMDProcessor 网元告警内存/CPU/磁盘检查 @@ -36,7 +39,8 @@ type NeAlarmStateCheckCMDProcessor struct { alarmService *neDataService.Alarm // 告警信息服务 wsSendService *wsService.WSSend // ws发送服务 count int // 执行次数 - + triggerMax int // 阈值连续触发次数大于才会产生告警 + triggerCount int // 阈值连续触发次数 } // alarmParams 告警参数 @@ -47,7 +51,7 @@ type alarmParams struct { SpecificProblem string `json:"specificProblem"` // Alarm Cause: CPU/Menory/Disk status received from target NE reaches the threshold SpecificProblemID string `json:"specificProblemId"` // AC10100 AddInfo string `json:"addInfo"` // 告警补充信息 - CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100% + CPUUseGt int64 `json:"cpuUseGt"` // CPU使用率大于, 范围0~100*CPU核心数 MemUseGt int64 `json:"memUseGt"` // 内存使用率大于, 范围0~100% DiskUseGt int64 `json:"diskUseGt"` // 磁盘使用率大于, 范围0~100% @@ -72,8 +76,9 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) { return nil, fmt.Errorf("json params err: %v", err) } // 检查使用率 - if params.CPUUseGt > 100 || params.CPUUseGt < 0 { - return nil, fmt.Errorf("cpuUseGt must be between 0 and 100") + numCPU := runtime.NumCPU() + if params.CPUUseGt > int64(numCPU*100) || params.CPUUseGt < 0 { + return nil, fmt.Errorf("cpuUseGt must be between 0 and 100 * NumCPU") } if params.MemUseGt > 100 || params.MemUseGt < 0 { return nil, fmt.Errorf("memUseGt must be between 0 and 100") @@ -137,6 +142,7 @@ func (s *NeAlarmStateCheckCMDProcessor) Execute(data any) (any, error) { // 进行新增 newAlarm, err := s.alarmNew(neInfo, params) params.AddInfo = addInfo // 恢复附加信息 + s.triggerCount = 0 // 重置连续触发次数 if err != nil { result[neTypeAndId] = err.Error() continue @@ -227,7 +233,10 @@ func (s NeAlarmStateCheckCMDProcessor) serverState(state map[string]any, cpuUseG warnMsg = append(warnMsg, fmt.Sprintf("disk usage %.2f%%", sysDiskUsage)) } if len(warnMsg) > 0 { - return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", ")) + s.triggerCount++ + if s.triggerCount > s.triggerMax { + return fmt.Errorf("greater than %s", strings.Join(warnMsg, ", ")) + } } return nil } diff --git a/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go b/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go index 162d6096..eca37261 100644 --- a/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go +++ b/src/modules/crontask/processor/ne_alarm_state_check_license/ne_alarm_state_check_license.go @@ -120,8 +120,15 @@ func (s *NeAlarmStateCheckLicenseProcessor) Execute(data any) (any, error) { } // 未记录 if alarmStatus == "" { + addInfo := params.AddInfo + if params.AddInfo != "" { + params.AddInfo = params.AddInfo + ", " + err.Error() + } else { + params.AddInfo = err.Error() + } // 进行新增 newAlarm, err := s.alarmNew(neInfo, params) + params.AddInfo = addInfo // 恢复附加信息 if err != nil { result[neTypeAndId] = err.Error() continue @@ -150,7 +157,7 @@ func (s NeAlarmStateCheckLicenseProcessor) serverState(state map[string]any, day } // 计算距离天数 - daysLeft := int64(time.Since(expireTime).Hours() / 24) + daysLeft := int64(time.Until(expireTime).Hours() / 24) if daysLeft < dayLt { return fmt.Errorf("license will expire in %d days", daysLeft) }