feat: 告警清除/确认/状态检查任务

This commit is contained in:
TsMask
2025-02-26 17:50:59 +08:00
parent 75b1efcc09
commit f583f0bffd
6 changed files with 284 additions and 286 deletions

View File

@@ -1,282 +0,0 @@
package genNeStateAlarm
import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"be.ems/features/fm"
"be.ems/lib/config"
"be.ems/lib/global"
"be.ems/lib/log"
"be.ems/src/framework/cron"
"be.ems/src/framework/database/db"
"be.ems/src/framework/utils/date"
neDataModel "be.ems/src/modules/network_data/model"
neModel "be.ems/src/modules/network_element/model"
neService "be.ems/src/modules/network_element/service"
"github.com/go-resty/resty/v2"
)
var NewProcessor = &BarProcessor{
neInfoService: neService.NewNeInfo,
progress: 0,
count: 0,
}
// bar 队列任务处理
type BarProcessor struct {
neInfoService *neService.NeInfo // 网元信息服务
// 任务进度
progress int
// 执行次数
count int
}
type BarParams struct {
AlarmID string `json:"alarmID"`
AlarmCode int64 `json:"alarmCode"`
AlarmTitle string `json:"alarmTitle"`
AlarmType string `json:"alarmType"`
OrigSeverity string `json:"origSeverity"`
ObjectUID string `json:"objectUID"`
ObjectName string `json:"objectName"`
ObjectType string `json:"objectType"`
SpecificProblem string `json:"specificProblem"`
SpecificProblemID string `json:"specificProblemID"`
AddInfo string `json:"AddInfo"`
Threshold int64 `json:"threshold"`
}
var client = resty.New()
func init() {
client.
SetTimeout(time.Duration(400 * time.Millisecond))
}
func (s *BarProcessor) Execute(data any) (any, error) {
var err error
s.count++
options := data.(cron.JobData)
sysJob := options.SysJob
var alarmDefine BarParams
err = json.Unmarshal([]byte(sysJob.TargetParams), &alarmDefine)
if err != nil {
log.Error("Failed to Unmarshal:", err)
return nil, err
}
succActiveAlarmNum := 0
failActiveAlarmNum := 0
succClearAlarmNum := 0
failClearAlarmNum := 0
neList := s.neInfoService.Find(neModel.NeInfo{}, false, false)
for _, neInfo := range neList {
//log.Debug("ne:", ne)
sql := fmt.Sprintf("select * from ne_state where ne_type='%s' and ne_id='%s' order by `timestamp` desc limit 1", neInfo.NeType, neInfo.NeId)
log.Debug("SQL:", sql)
neState, err := db.RawDB("", sql, nil)
if err != nil {
log.Error("Failed to get ne_state:", err)
continue
}
if len(neState) == 0 {
log.Warn("Not found record in ne_state:")
//continue
}
//log.Debug("neState:", *neState)
// params := "10000"
// alarmDefine, err := dborm.XormGetAlarmDefine(params)
// if err != nil {
// log.Error("Failed to get alarm_define:", err)
// continue
// } else if alarmDefine == nil {
// log.Error("Not found data from alarm_define")
// continue
// }
// log.Debug("alarmDefine:", alarmDefine)
sql = fmt.Sprintf("select * from alarm where alarm_id = '%s' and ne_type='%s' and ne_id = '%s' order by event_time desc limit 1",
alarmDefine.AlarmID, neInfo.NeType, neInfo.RmUID)
alarm, err := db.RawDB("", sql, nil)
if err != nil {
log.Error("Failed to get alarm:", err)
continue
}
//log.Debug("alarm:", *alarm)
var timestamp string
if len(neState) == 0 {
log.Infof("Not found ne_state neType:%s, neId:%s", neInfo.NeType, neInfo.NeId)
timestamp = date.ParseDateToStr(neInfo.UpdateTime, date.YYYYMMDDHHMMSS)
} else {
timestamp = fmt.Sprint(neState[0]["timestamp"])
}
// 解析日期时间字符串为时间对象
seconds, err := global.GetSecondsSinceDatetime(timestamp)
if err != nil {
log.Error("Failed to GetSecondsSinceDatetime:", err)
continue
}
log.Debugf("timestamp:%s seconds:%d", timestamp, seconds)
if seconds <= alarmDefine.Threshold {
if len(alarm) == 0 || alarm[0]["alarm_status"] == fm.AlarmStatusClearString {
continue
}
// clear alarm, todo
var alarmSeq int64 = 1
threshold := strconv.FormatInt(alarmDefine.Threshold, 10)
SpecificProblem := strings.ReplaceAll(alarmDefine.SpecificProblem, "{threshold}", threshold)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%sthreshold=%v", timestamp, alarmDefine.Threshold)
alarmData := &neDataModel.Alarm{
AlarmSeq: alarmSeq,
AlarmId: alarmDefine.AlarmID,
NeId: neInfo.RmUID,
NeType: neInfo.NeType,
NeName: neInfo.NeName,
Province: neInfo.Province,
PvFlag: neInfo.PvFlag,
AlarmCode: alarmDefine.AlarmCode,
AlarmTitle: alarmDefine.AlarmTitle,
AlarmType: alarmDefine.AlarmType,
AlarmStatus: "0",
OrigSeverity: alarmDefine.OrigSeverity,
ObjectUid: alarmDefine.ObjectUID,
ObjectName: alarmDefine.ObjectName,
ObjectType: alarmDefine.ObjectType,
LocationInfo: locationInfo,
SpecificProblem: SpecificProblem,
SpecificProblemId: alarmDefine.SpecificProblemID,
AddInfo: alarmDefine.AddInfo,
EventTime: time.Now().UnixMilli(),
}
alarmArray := &[]neDataModel.Alarm{*alarmData}
body, _ := json.Marshal(alarmArray)
//log.Debug("body: ", string(body))
var response *resty.Response
requestURI := fmt.Sprintf("/api/rest/faultManagement/v1/elementType/%s/objectType/alarms", neInfo.NeType)
//restHost := fmt.Sprintf("http://127.0.0.1:%d", config.GetYamlConfig().Rest[0].Port)
restHost := config.GetOMCHostUrl()
requestURL := fmt.Sprintf("%s%s", restHost, requestURI)
log.Debug("requestURL: POST ", requestURL)
response, err = client.R().
EnableTrace().
//SetHeaders(map[string]string{tokenConst.HEADER_KEY: r.Header.Get(tokenConst.HEADER_KEY)}).
SetHeaders(map[string]string{"User-Agent": config.GetDefaultUserAgent()}).
SetHeaders(map[string]string{"Content-Type": "application/json;charset=UTF-8"}).
SetBody(body).
Post(requestURL)
if err != nil {
log.Error("Failed to post:", err)
failClearAlarmNum++
continue
}
log.Debug("StatusCode: ", response.StatusCode())
switch response.StatusCode() {
case http.StatusOK, http.StatusCreated, http.StatusNoContent, http.StatusAccepted:
log.Debug("response body:", string(response.Body()))
body := new(map[string]interface{})
_ = json.Unmarshal(response.Body(), &body)
succClearAlarmNum++
default:
log.Debug("response body:", string(response.Body()))
body := new(map[string]interface{})
_ = json.Unmarshal(response.Body(), &body)
failClearAlarmNum++
}
} else {
var alarmSeq int64 = 1
if len(alarm) > 0 && alarm[0]["alarm_status"] == fm.AlarmStatusActiveString {
log.Info("System state alarm has exist")
continue
}
threshold := strconv.FormatInt(alarmDefine.Threshold, 10)
SpecificProblem := strings.ReplaceAll(alarmDefine.SpecificProblem, "{threshold}", threshold)
locationInfo := fmt.Sprintf("SystemManagement.State: NE heartbeat timestamp=%sthreshold=%v", timestamp, alarmDefine.Threshold)
alarmData := &neDataModel.Alarm{
AlarmSeq: alarmSeq,
AlarmId: alarmDefine.AlarmID,
NeId: neInfo.RmUID,
NeType: neInfo.NeType,
NeName: neInfo.NeName,
Province: neInfo.Province,
PvFlag: neInfo.PvFlag,
AlarmCode: alarmDefine.AlarmCode,
AlarmTitle: alarmDefine.AlarmTitle,
AlarmType: alarmDefine.AlarmType,
AlarmStatus: "1",
OrigSeverity: alarmDefine.OrigSeverity,
ObjectUid: alarmDefine.ObjectUID,
ObjectName: alarmDefine.ObjectName,
ObjectType: alarmDefine.ObjectType,
LocationInfo: locationInfo,
SpecificProblem: SpecificProblem,
SpecificProblemId: alarmDefine.SpecificProblemID,
AddInfo: alarmDefine.AddInfo,
EventTime: time.Now().UnixMilli(),
}
alarmArray := &[]neDataModel.Alarm{*alarmData}
body, _ := json.Marshal(alarmArray)
//log.Debug("body: ", string(body))
var response *resty.Response
requestURI := fmt.Sprintf("/api/rest/faultManagement/v1/elementType/%s/objectType/alarms", neInfo.NeType)
//restHost := fmt.Sprintf("http://127.0.0.1:%d", config.GetYamlConfig().Rest[0].Port)
restHost := config.GetOMCHostUrl()
requestURL := fmt.Sprintf("%s%s", restHost, requestURI)
log.Debug("requestURL: POST ", requestURL)
response, err = client.R().
EnableTrace().
SetHeaders(map[string]string{"User-Agent": config.GetDefaultUserAgent()}).
SetHeaders(map[string]string{"Content-Type": "application/json;charset=UTF-8"}).
SetBody(body).
Post(requestURL)
if err != nil {
log.Error("Failed to post:", err)
failActiveAlarmNum++
continue
}
log.Debug("StatusCode: ", response.StatusCode())
switch response.StatusCode() {
case http.StatusOK, http.StatusCreated, http.StatusNoContent, http.StatusAccepted:
log.Debug("response body:", string(response.Body()))
body := new(map[string]interface{})
_ = json.Unmarshal(response.Body(), &body)
succActiveAlarmNum++
default:
log.Debug("response body:", string(response.Body()))
body := new(map[string]interface{})
_ = json.Unmarshal(response.Body(), &body)
failActiveAlarmNum++
}
}
}
// 返回结果,用于记录执行结果
return map[string]any{
"succActiveAlarmNum": succActiveAlarmNum,
"failActiveAlarmNum": failActiveAlarmNum,
"succClearAlarmNum": succClearAlarmNum,
"failClearAlarmNum": failClearAlarmNum,
}, nil
}

View File

@@ -0,0 +1,169 @@
package ne_alarm_state_check
import (
"encoding/json"
"fmt"
"time"
"be.ems/src/framework/constants"
"be.ems/src/framework/cron"
"be.ems/src/framework/logger"
"be.ems/src/framework/utils/parse"
neDataModel "be.ems/src/modules/network_data/model"
neDataService "be.ems/src/modules/network_data/service"
neModel "be.ems/src/modules/network_element/model"
neService "be.ems/src/modules/network_element/service"
wsService "be.ems/src/modules/ws/service"
)
var NewProcessor = &NeAlarmStateCheckProcessor{
neConfigBackupService: neService.NewNeConfigBackup,
neInfoService: neService.NewNeInfo,
alarmService: neDataService.NewAlarm,
wsSendService: wsService.NewWSSend,
count: 0,
}
// NeAlarmStateCheckProcessor 网元告警状态检查
type NeAlarmStateCheckProcessor struct {
neConfigBackupService *neService.NeConfigBackup // 网元配置文件备份记录服务
neInfoService *neService.NeInfo // 网元信息服务
alarmService *neDataService.Alarm // 告警信息服务
wsSendService *wsService.WSSend // ws发送服务
count int // 执行次数
}
// alarmParams 告警参数
type alarmParams struct {
AlarmTitle string `json:"alarmTitle"` // NE State Check Alarm
AlarmType string `json:"alarmType"` // EquipmentAlarm=2
OrigSeverity string `json:"origSeverity"` // Major=2
SpecificProblem string `json:"specificProblem"` // alarm cause: the system state of target NE has not been received
SpecificProblemID string `json:"specificProblemId"` // AC10000
AddInfo string `json:"addInfo"`
// === 非参数字段 ===
AlarmId string // 告警ID
}
func (s *NeAlarmStateCheckProcessor) Execute(data any) (any, error) {
s.count++ // 执行次数加一
options := data.(cron.JobData)
sysJob := options.SysJob
logger.Infof("重复 %v 任务ID %d", options.Repeat, sysJob.JobId)
// 返回结果,用于记录执行结果
result := map[string]any{
"count": s.count,
}
// 读取参数值
var params alarmParams
err := json.Unmarshal([]byte(sysJob.TargetParams), &params)
if err != nil {
return nil, fmt.Errorf("json params err: %v", err)
}
neList := s.neInfoService.Find(neModel.NeInfo{}, true, false)
for _, neInfo := range neList {
neTypeAndId := fmt.Sprintf("%s_%s", neInfo.NeType, neInfo.NeId)
// 网元在线状态
isOnline := parse.Boolean(neInfo.ServerState["online"])
// 告警ID
params.AlarmId = fmt.Sprintf("%d%d", constants.ALARM_STATE_CHECK, neInfo.CreateTime)
// 检查网元告警ID是否唯一
alarmIdArr := s.alarmService.Find(neDataModel.Alarm{
NeType: neInfo.NeType,
NeId: neInfo.NeId,
AlarmId: params.AlarmId,
})
// 告警状态
alarmStatus := ""
if len(alarmIdArr) > 0 {
alarmStatus = fmt.Sprint(alarmIdArr[0].AlarmStatus)
}
// 在线且状态为活动告警
if isOnline && alarmStatus == "1" {
// 进行清除
newAlarm, err := s.alarmClear(neInfo, alarmIdArr[0])
if err != nil {
result[neTypeAndId] = err.Error()
continue
}
groupID := fmt.Sprintf("%s_%s_%s", wsService.GROUP_ALARM, neInfo.NeType, neInfo.NeId)
s.wsSendService.ByGroupID(groupID, newAlarm)
result[neTypeAndId] = "alarm clear"
}
// 不在线
if !isOnline && alarmStatus == "" {
// 进行新增
newAlarm, err := s.alarmNew(neInfo, params)
if err != nil {
result[neTypeAndId] = err.Error()
continue
}
groupID := fmt.Sprintf("%s_%s_%s", wsService.GROUP_ALARM, neInfo.NeType, neInfo.NeId)
s.wsSendService.ByGroupID(groupID, newAlarm)
result[neTypeAndId] = "alarm new"
}
}
// 返回结果,用于记录执行结果
return result, nil
}
// alarmClear 清除告警
func (s NeAlarmStateCheckProcessor) alarmClear(neInfo neModel.NeInfo, v neDataModel.Alarm) (neDataModel.Alarm, error) {
// 变更告警ID为告警清除ID
v.AlarmId = fmt.Sprintf("%d%d", v.AlarmCode, v.EventTime)
v.AlarmStatus = "0"
// 告警清除
v.ClearType = 1
v.ClearTime = neInfo.UpdateTime
v.ClearUser = "system"
rows := s.alarmService.Update(v)
if rows > 0 {
return v, nil
}
return neDataModel.Alarm{}, fmt.Errorf("clear alarm fail")
}
// alarmNew 新增告警
func (s NeAlarmStateCheckProcessor) alarmNew(neInfo neModel.NeInfo, v alarmParams) (neDataModel.Alarm, error) {
// seq 告警序号
lastSeq := s.alarmService.FindAlarmSeqLast(neInfo.NeType, neInfo.NeId)
lastTime := neInfo.UpdateTime // 网元最后更新时间
if lastTime < neInfo.CreateTime {
lastTime = time.Now().UnixMilli()
}
alarm := neDataModel.Alarm{
NeType: neInfo.NeType,
NeId: neInfo.NeId,
NeName: neInfo.NeName,
Province: neInfo.Province,
PvFlag: neInfo.PvFlag,
AlarmSeq: lastSeq + 1,
AlarmId: v.AlarmId,
AlarmTitle: v.AlarmTitle,
AlarmCode: constants.ALARM_STATE_CHECK,
EventTime: lastTime,
AlarmType: "2",
OrigSeverity: "2",
PerceivedSeverity: "2",
ObjectUid: neInfo.RmUID,
ObjectName: "SystemManagement;Heartbeat",
ObjectType: "SystemState",
LocationInfo: "SystemManagement.State: NE Heartbeat",
AlarmStatus: "1",
SpecificProblem: v.SpecificProblem,
SpecificProblemId: v.SpecificProblemID,
AddInfo: v.AddInfo,
}
insertId := s.alarmService.Insert(alarm)
if insertId > 0 {
alarm.ID = insertId
return alarm, nil
}
return neDataModel.Alarm{}, fmt.Errorf("new alarm fail")
}

View File

@@ -6,9 +6,9 @@ import (
"be.ems/src/modules/crontask/processor/delExpiredNeBackup"
"be.ems/src/modules/crontask/processor/deleteExpiredRecord"
"be.ems/src/modules/crontask/processor/exportTable"
"be.ems/src/modules/crontask/processor/genNeStateAlarm"
"be.ems/src/modules/crontask/processor/getStateFromNE"
processorMonitorSysResource "be.ems/src/modules/crontask/processor/monitor_sys_resource"
processorNeAlarmStateCheck "be.ems/src/modules/crontask/processor/ne_alarm_state_check"
processorNeConfigBackup "be.ems/src/modules/crontask/processor/ne_config_backup"
processorNeDataUDM "be.ems/src/modules/crontask/processor/ne_data_udm"
"be.ems/src/modules/crontask/processor/removeFile"
@@ -22,12 +22,13 @@ func InitCronQueue() {
cron.CreateQueue("ne_config_backup", processorNeConfigBackup.NewProcessor)
// 网元数据-UDM数据刷新同步
cron.CreateQueue("ne_data_udm", processorNeDataUDM.NewProcessor)
// 网元告警-状态检查
cron.CreateQueue("ne_alarm_state_check", processorNeAlarmStateCheck.NewProcessor)
// delete expired NE backup file
cron.CreateQueue("delExpiredNeBackup", delExpiredNeBackup.NewProcessor)
cron.CreateQueue("deleteExpiredRecord", deleteExpiredRecord.NewProcessor)
cron.CreateQueue("backupEtcFromNE", backupEtcFromNE.NewProcessor)
cron.CreateQueue("getStateFromNE", getStateFromNE.NewProcessor)
cron.CreateQueue("genNeStateAlarm", genNeStateAlarm.NewProcessor)
cron.CreateQueue("exportTable", exportTable.NewProcessor)
cron.CreateQueue("removeFile", removeFile.NewProcessor)
}

View File

@@ -52,7 +52,7 @@ type AlarmController struct {
// @Summary Alarm List
// @Description Alarm List
// @Router /neData/alarm/list [get]
func (s *AlarmController) List(c *gin.Context) {
func (s AlarmController) List(c *gin.Context) {
var query model.AlarmQuery
if err := c.ShouldBindQuery(&query); err != nil {
errMsgs := fmt.Sprintf("bind err: %s", resp.FormatBindError(err))
@@ -67,7 +67,7 @@ func (s *AlarmController) List(c *gin.Context) {
// 告警删除
//
// DELETE /:id
func (s *AlarmController) Remove(c *gin.Context) {
func (s AlarmController) Remove(c *gin.Context) {
language := reqctx.AcceptLanguage(c)
id := c.Param("id")
if id == "" {
@@ -91,3 +91,48 @@ func (s *AlarmController) Remove(c *gin.Context) {
msg := i18n.TTemplate(language, "app.common.deleteSuccess", map[string]any{"num": rows})
c.JSON(200, resp.OkMsg(msg))
}
// 告警清除
//
// PUT /clear
func (s AlarmController) Clear(c *gin.Context) {
var body struct {
Ids []int64 `json:"ids" binding:"required"`
}
if err := c.ShouldBindBodyWithJSON(&body); err != nil {
errMsgs := fmt.Sprintf("bind err: %s", resp.FormatBindError(err))
c.JSON(422, resp.CodeMsg(40422, errMsgs))
return
}
clearUser := reqctx.LoginUserToUserName(c)
rows, err := s.alarmService.AlarmClearByIds(body.Ids, clearUser)
if err != nil {
c.JSON(200, resp.ErrMsg(err.Error()))
return
}
c.JSON(200, resp.OkData(rows))
}
// 告警确认
//
// PUT /ack
func (s AlarmController) Ack(c *gin.Context) {
var body struct {
Ids []int64 `json:"ids" binding:"required"`
AckState bool `json:"ackState" binding:"omitempty"`
}
if err := c.ShouldBindBodyWithJSON(&body); err != nil {
errMsgs := fmt.Sprintf("bind err: %s", resp.FormatBindError(err))
c.JSON(422, resp.CodeMsg(40422, errMsgs))
return
}
ackUser := reqctx.LoginUserToUserName(c)
rows, err := s.alarmService.AlarmAckByIds(body.Ids, ackUser, body.AckState)
if err != nil {
c.JSON(200, resp.ErrMsg(err.Error()))
return
}
c.JSON(200, resp.OkData(rows))
}

View File

@@ -40,6 +40,14 @@ func Setup(router *gin.Engine) {
middleware.PreAuthorize(nil),
controller.NewAlarm.Remove,
)
alarmGroup.PUT("/clear",
middleware.PreAuthorize(nil),
controller.NewAlarm.Clear,
)
alarmGroup.PUT("/ack",
middleware.PreAuthorize(nil),
controller.NewAlarm.Ack,
)
}
// 基站状态历史记录信息 含AMF/MME

View File

@@ -2,7 +2,9 @@ package service
import (
"fmt"
"time"
"be.ems/src/framework/constants"
"be.ems/src/modules/network_data/model"
"be.ems/src/modules/network_data/repository"
)
@@ -57,3 +59,58 @@ func (r Alarm) DeleteByIds(ids []int64) (int64, error) {
func (s Alarm) FindAlarmSeqLast(neType, neId string) int64 {
return s.alarmRepository.SelectAlarmSeqLast(neType, neId)
}
// AlarmClearByIds 批量清除告警信息
func (r Alarm) AlarmClearByIds(ids []int64, clearUser string) (int64, error) {
// 检查是否存在
arr := r.alarmRepository.SelectByIds(ids)
if len(arr) <= 0 {
return 0, fmt.Errorf("no data")
}
if len(arr) == len(ids) {
var rows int64 = 0
for _, v := range arr {
// 状态检查AlarmCode变更告警ID
if v.AlarmCode == constants.ALARM_STATE_CHECK {
v.AlarmId = fmt.Sprintf("%d%d", v.AlarmCode, v.EventTime)
}
v.AlarmStatus = "0"
// 告警清除
v.ClearType = 2
v.ClearTime = time.Now().UnixMilli()
v.ClearUser = clearUser
rows += r.alarmRepository.Update(v)
}
return rows, nil
}
// 清除失败!
return 0, fmt.Errorf("clear fail")
}
// AlarmAckByIds 批量确认清除告警信息
func (r Alarm) AlarmAckByIds(ids []int64, ackUser string, ackState bool) (int64, error) {
// 检查是否存在
arr := r.alarmRepository.SelectByIds(ids)
if len(arr) <= 0 {
return 0, fmt.Errorf("no data")
}
if len(arr) == len(ids) {
var rows int64 = 0
for _, v := range arr {
// 确认清除
if ackState {
v.AckState = 1
} else {
v.AckState = 0
}
v.AckTime = time.Now().UnixMilli()
v.AckUser = ackUser
rows += r.alarmRepository.Update(v)
}
return rows, nil
}
// 清除失败!
return 0, fmt.Errorf("ack fail")
}