feat: 优化网元状态告警处理逻辑,增加状态更新时间戳

This commit is contained in:
TsMask
2025-02-25 14:52:53 +08:00
parent 88d6f36a18
commit dcf513bcfd
2 changed files with 53 additions and 51 deletions

View File

@@ -10,20 +10,28 @@ import (
"be.ems/features/fm"
"be.ems/lib/config"
"be.ems/lib/dborm"
"be.ems/lib/global"
"be.ems/lib/log"
"be.ems/src/framework/cron"
"be.ems/src/framework/datasource"
"be.ems/src/framework/utils/date"
"be.ems/src/framework/utils/parse"
"github.com/go-resty/resty/v2"
neModel "be.ems/src/modules/network_element/model"
neService "be.ems/src/modules/network_element/service"
)
var NewProcessor = &BarProcessor{
progress: 0,
count: 0,
neConfigBackupService: neService.NewNeConfigBackup,
neInfoService: neService.NewNeInfo,
progress: 0,
count: 0,
}
// bar 队列任务处理
type BarProcessor struct {
neConfigBackupService *neService.NeConfigBackup // 网元配置文件备份记录服务
neInfoService *neService.NeInfo // 网元信息服务
// 任务进度
progress int
// 执行次数
@@ -97,32 +105,26 @@ func (s *BarProcessor) Execute(data any) (any, error) {
return nil, err
}
var nes []dborm.NeInfo
_, err = dborm.XormGetAllNeInfo(&nes)
if err != nil {
log.Error("Failed to get all ne info:", err)
return nil, err
}
succActiveAlarmNum := 0
failActiveAlarmNum := 0
succClearAlarmNum := 0
failClearAlarmNum := 0
for _, ne := range nes {
neList := s.neInfoService.SelectList(neModel.NeInfo{}, true, false)
for _, ne := range neList {
//log.Debug("ne:", ne)
sql := fmt.Sprintf("select * from ne_state where ne_type='%s' and ne_id='%s' order by `timestamp` desc limit 1", ne.NeType, ne.NeId)
log.Debug("SQL:", sql)
neState, err := dborm.XormGetDataBySQL(sql)
if err != nil {
log.Error("Failed to get ne_state:", err)
continue
}
if len(*neState) == 0 {
log.Warn("Not found record in ne_state:")
//continue
}
// sql := fmt.Sprintf("select * from ne_state where ne_type='%s' and ne_id='%s' order by `timestamp` desc limit 1", ne.NeType, ne.NeId)
// log.Debug("SQL:", sql)
// neState, err := dborm.XormGetDataBySQL(sql)
// if err != nil {
// log.Error("Failed to get ne_state:", err)
// continue
// }
// if len(*neState) == 0 {
// log.Warn("Not found record in ne_state:")
// //continue
// }
//log.Debug("neState:", *neState)
// params := "10000"
@@ -138,33 +140,31 @@ func (s *BarProcessor) Execute(data any) (any, error) {
// log.Debug("alarmDefine:", alarmDefine)
sql = fmt.Sprintf("select * from alarm where alarm_id = '%s' and ne_type='%s' and ne_id = '%s' order by event_time desc limit 1",
// 是否存在告警
sql := fmt.Sprintf("select * from alarm where alarm_id = '%s' and ne_type='%s' and ne_id = '%s' order by event_time desc limit 1",
alarmDefine.AlarmID, ne.NeType, ne.RmUID)
alarm, err := dborm.XormGetDataBySQL(sql)
alarm, err := datasource.RawDB("", sql, nil)
if err != nil {
log.Error("Failed to get alarm:", err)
continue
}
//log.Debug("alarm:", *alarm)
var timestamp string
if len(*neState) == 0 {
log.Infof("Not found ne_state neType:%s, neId:%s", ne.NeType, ne.NeId)
timestamp = ne.UpdateTime.Format(time.DateTime)
} else {
timestamp = (*neState)[0]["timestamp"]
alarmStatus := "0"
if len(alarm) > 0 {
alarmStatus = fmt.Sprint(alarm[0]["alarm_status"])
}
// 解析日期时间字符串为时间对象
seconds, err := global.GetSecondsSinceDatetime(timestamp)
if err != nil {
log.Error("Failed to GetSecondsSinceDatetime:", err)
continue
changeTime := time.UnixMilli(ne.UpdateTime)
// 检查状态
isOnline := false
if ne.ServerState != nil {
isOnline = parse.Boolean(ne.ServerState["online"])
}
log.Debugf("timestamp:%s seconds:%d", timestamp, seconds)
if seconds <= alarmDefine.Threshold {
if len(*alarm) == 0 || (*alarm)[0]["alarm_status"] == fm.AlarmStatusClearString {
// 在线且状态为活动告警
if isOnline && alarmStatus == "1" {
if len(alarm) == 0 {
continue
}
if alarmStatus == "0" {
continue
}
@@ -172,7 +172,8 @@ func (s *BarProcessor) Execute(data any) (any, error) {
var alarmSeq int = 1
threshold := strconv.FormatInt(alarmDefine.Threshold, 10)
SpecificProblem := strings.ReplaceAll(alarmDefine.SpecificProblem, "{threshold}", threshold)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%sthreshold=%v", timestamp, alarmDefine.Threshold)
timeStr := date.ParseDateToStr(changeTime, date.YYYY_MM_DDTHH_MM_SSZ)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%sthreshold=%v", timeStr, alarmDefine.Threshold)
alarmData := &Alarm{
AlarmSeq: alarmSeq,
AlarmId: alarmDefine.AlarmID,
@@ -232,16 +233,15 @@ func (s *BarProcessor) Execute(data any) (any, error) {
_ = json.Unmarshal(response.Body(), &body)
failClearAlarmNum++
}
} else {
var alarmSeq int = 1
if len(*alarm) > 0 && (*alarm)[0]["alarm_status"] == fm.AlarmStatusActiveString {
log.Info("System state alarm has exist")
continue
}
}
// 不在线且状态为清除告警
if !isOnline && alarmStatus == "0" {
var alarmSeq int = 1
threshold := strconv.FormatInt(alarmDefine.Threshold, 10)
SpecificProblem := strings.ReplaceAll(alarmDefine.SpecificProblem, "{threshold}", threshold)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%sthreshold=%v", timestamp, alarmDefine.Threshold)
timeStr := date.ParseDateToStr(changeTime, date.YYYY_MM_DDTHH_MM_SSZ)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%s threshold=%v", timeStr, alarmDefine.Threshold)
alarmData := &Alarm{
AlarmSeq: alarmSeq,
AlarmId: alarmDefine.AlarmID,

View File

@@ -208,8 +208,10 @@ func (r *NeInfo) UpdateState(id, status string) int64 {
tx := datasource.DB("").Model(&model.NeInfo{})
// 构建查询条件
tx = tx.Where("id = ?", id)
tx.UpdateColumn("status", status)
tx.UpdateColumn("update_time", time.Now().UnixMilli())
// 执行更新
if err := tx.UpdateColumn("status", status).Error; err != nil {
if err := tx.Error; err != nil {
logger.Errorf("update err => %v", err.Error())
return 0
}