feat: 调度任务添加告警-UDM DB同步检查

This commit is contained in:
TsMask
2025-09-02 17:22:36 +08:00
parent 6c66002653
commit 7661b11f5e
9 changed files with 263 additions and 0 deletions

View File

@@ -0,0 +1,248 @@
package ne_alarm_state_check_udmdb_sync
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"be.ems/src/framework/constants"
"be.ems/src/framework/cron"
"be.ems/src/framework/database/redis"
"be.ems/src/framework/logger"
neDataModel "be.ems/src/modules/network_data/model"
neDataService "be.ems/src/modules/network_data/service"
neModel "be.ems/src/modules/network_element/model"
neService "be.ems/src/modules/network_element/service"
wsService "be.ems/src/modules/ws/service"
)
var NewProcessor = &NeAlarmStateCheckUDMDBProcessor{
neConfigBackupService: neService.NewNeConfigBackup,
neInfoService: neService.NewNeInfo,
alarmService: neDataService.NewAlarm,
wsSendService: wsService.NewWSSend,
count: 0,
}
// NeAlarmStateCheckUDMDBProcessor 告警-UDM DB同步检查
type NeAlarmStateCheckUDMDBProcessor struct {
neConfigBackupService *neService.NeConfigBackup // 网元配置文件备份记录服务
neInfoService *neService.NeInfo // 网元信息服务
alarmService *neDataService.Alarm // 告警信息服务
wsSendService *wsService.WSSend // ws发送服务
count int // 执行次数
}
// alarmParams 告警参数
type alarmParams struct {
AlarmTitle string `json:"alarmTitle"` // NE State Check Alarm UDM DB Sync
AlarmType string `json:"alarmType"` // EquipmentAlarm=2
OrigSeverity string `json:"origSeverity"` // Major=2
SpecificProblem string `json:"specificProblem"` // Alarm Cause: UDM DB Sync Failed
SpecificProblemID string `json:"specificProblemId"` // AC10300
AddInfo string `json:"addInfo"` // 告警补充信息
// === 非参数字段 ===
AlarmId string // 告警ID
}
func (s *NeAlarmStateCheckUDMDBProcessor) Execute(data any) (any, error) {
s.count++ // 执行次数加一
options := data.(cron.JobData)
sysJob := options.SysJob
logger.Infof("重复:%v 任务ID:%s 执行次数:%d", options.Repeat, sysJob.JobID, s.count)
// 返回结果,用于记录执行结果
result := map[string]any{
"count": s.count,
}
// 读取参数值
var params alarmParams
err := json.Unmarshal([]byte(sysJob.TargetParams), &params)
if err != nil {
return nil, fmt.Errorf("json params err: %v", err)
}
neList := s.neInfoService.SelectList(neModel.NeInfo{NeType: "UDM"}, true, false)
for _, neInfo := range neList {
// 检查状态
err := s.serverState(neInfo)
if err == nil {
continue
}
if params.AddInfo != "" {
params.AddInfo = params.AddInfo + ", " + err.Error()
} else {
params.AddInfo = err.Error()
}
neTypeAndId := fmt.Sprintf("%s_%s", neInfo.NeType, neInfo.NeId)
// 告警ID
params.AlarmId = fmt.Sprintf("%d%d", constants.ALARM_UDM_DB_SYNC, neInfo.CreateTime)
// 检查网元告警ID是否唯一
alarmIdArr := s.alarmService.Find(neDataModel.Alarm{
NeType: neInfo.NeType,
NeId: neInfo.RmUID,
AlarmId: params.AlarmId,
})
// 告警状态, 存在的需要手动清除
alarmStatus := ""
if len(alarmIdArr) > 0 {
alarmStatus = fmt.Sprint(alarmIdArr[0].AlarmStatus)
}
// 活动告警进行清除
if alarmStatus == "1" {
clearAlarm, err := s.alarmClear(neInfo, alarmIdArr[0])
if err != nil {
result[neTypeAndId] = err.Error()
continue
}
groupID := fmt.Sprintf("%s_%s_%s", wsService.GROUP_ALARM, neInfo.NeType, neInfo.NeId)
s.wsSendService.ByGroupID(groupID, clearAlarm)
result[neTypeAndId] = "alarm clear"
alarmStatus = "" // 标记为未记录再次发起新告警
}
// 未记录
if alarmStatus == "" {
addInfo := params.AddInfo
if params.AddInfo != "" {
params.AddInfo = params.AddInfo + ", " + err.Error()
} else {
params.AddInfo = err.Error()
}
// 进行新增
newAlarm, err := s.alarmNew(neInfo, params)
params.AddInfo = addInfo // 恢复附加信息
if err != nil {
result[neTypeAndId] = err.Error()
continue
}
groupID := fmt.Sprintf("%s_%s_%s", wsService.GROUP_ALARM, neInfo.NeType, neInfo.NeId)
s.wsSendService.ByGroupID(groupID, newAlarm)
result[neTypeAndId] = "alarm new"
}
}
// 返回结果,用于记录执行结果
return result, nil
}
// serverState 网元状态
func (s NeAlarmStateCheckUDMDBProcessor) serverState(neInfo neModel.NeInfo) error {
// 网元主机的Redis客户端
source := fmt.Sprintf("UDM_%s", neInfo.NeId)
redisClient, err := neService.NewNeInfo.NeRunRedisClient("UDM", neInfo.NeId)
if err != nil {
return err
}
defer func() {
redisClient.Close()
redis.ConnectPush(source, nil)
}()
redis.ConnectPush(source, redisClient.Client)
// 数据源
rdb := redis.RDB(source)
if rdb == nil {
return fmt.Errorf("redis not client")
}
ctx := context.Background()
replication, err := rdb.Info(ctx, "replication").Result()
if err != nil {
logger.Errorf("redis info replication err:%s", err.Error())
return fmt.Errorf("redis info replication err")
}
master_host := "master_host:"
master_port := "master_port:"
lines := strings.SplitSeq(replication, "\r\n")
for line := range lines {
if line == "role:master" {
return nil
}
// "role:active-replica"
if line == "master_link_status:up" {
return nil
}
if strings.HasPrefix(line, master_host) {
arr := strings.Split(line, ":")
if len(arr) == 2 {
master_host = arr[1]
}
continue
}
if strings.HasPrefix(line, master_port) {
arr := strings.Split(line, ":")
if len(arr) == 2 {
master_port = arr[1]
}
continue
}
}
return fmt.Errorf("host:%s, port:%s, status down", master_host, master_port)
}
// alarmClear 清除告警
func (s NeAlarmStateCheckUDMDBProcessor) alarmClear(neInfo neModel.NeInfo, v neDataModel.Alarm) (neDataModel.Alarm, error) {
// 变更告警ID为告警清除ID
v.AlarmId = fmt.Sprintf("%s%d", v.AlarmCode, v.EventTime.UnixMilli())
v.AlarmStatus = "0"
// 告警清除
clearTime := time.UnixMilli(neInfo.UpdateTime)
v.ClearType = "1"
v.ClearTime = &clearTime
v.ClearUser = "system"
rows := s.alarmService.Update(v)
if rows > 0 {
return v, nil
}
return neDataModel.Alarm{}, fmt.Errorf("clear alarm fail")
}
// alarmNew 新增告警
func (s NeAlarmStateCheckUDMDBProcessor) alarmNew(neInfo neModel.NeInfo, v alarmParams) (neDataModel.Alarm, error) {
// seq 告警序号
lastSeq := s.alarmService.FindAlarmSeqLast(neInfo.NeType, neInfo.RmUID)
lastTime := neInfo.UpdateTime // 网元最后更新时间
if lastTime < neInfo.CreateTime {
lastTime = time.Now().UnixMilli()
}
alarm := neDataModel.Alarm{
NeType: neInfo.NeType,
NeId: neInfo.RmUID, // neInfo.NeId,
NeName: neInfo.NeName,
Province: neInfo.Province,
PvFlag: neInfo.PvFlag,
AlarmSeq: fmt.Sprint(lastSeq + 1),
AlarmId: v.AlarmId,
AlarmTitle: v.AlarmTitle,
AlarmCode: fmt.Sprint(constants.ALARM_LICENSE_CHECK),
EventTime: time.UnixMilli(lastTime),
AlarmType: v.AlarmType,
OrigSeverity: v.OrigSeverity,
PerceivedSeverity: v.OrigSeverity,
ObjectUid: neInfo.RmUID,
ObjectName: "UDM DB Sync",
ObjectType: "db_sync",
LocationInfo: "UDM DB Sync Cheack",
AlarmStatus: "1", // 活动告警
SpecificProblem: v.SpecificProblem,
SpecificProblemId: v.SpecificProblemID,
AddInfo: v.AddInfo,
Counter: "0",
AckState: "0",
ClearType: "0",
}
insertId := s.alarmService.InsertAndForword(alarm)
if insertId != "" {
alarm.ID = insertId
return alarm, nil
}
return neDataModel.Alarm{}, fmt.Errorf("new alarm fail")
}

View File

@@ -22,6 +22,7 @@ import (
processorNeAlarmStateCheck "be.ems/src/modules/crontask/processor/ne_alarm_state_check"
processorNeAlarmStateCheckCMD "be.ems/src/modules/crontask/processor/ne_alarm_state_check_cmd"
processorNeAlarmStateCheckLicense "be.ems/src/modules/crontask/processor/ne_alarm_state_check_license"
processorNeAlarmStateCheckUDMDB "be.ems/src/modules/crontask/processor/ne_alarm_state_check_udmdb_sync"
processorNeConfigBackup "be.ems/src/modules/crontask/processor/ne_config_backup"
processorNeDataUDM "be.ems/src/modules/crontask/processor/ne_data_udm"
"be.ems/src/modules/crontask/processor/removeFile"
@@ -62,6 +63,8 @@ func InitCronQueue() {
cron.CreateQueue("ne_alarm_state_check_cmd", processorNeAlarmStateCheckCMD.NewProcessor)
// 网元告警-License到期检查
cron.CreateQueue("ne_alarm_state_check_license", processorNeAlarmStateCheckLicense.NewProcessor)
// 网元告警-UDMDB同步检查
cron.CreateQueue("ne_alarm_state_check_udmdb_sync", processorNeAlarmStateCheckUDMDB.NewProcessor)
// 备份-删除备份目录下文件
cron.CreateQueue("backup_remove_file", processorBackupRemoveFile.NewProcessor)