Compare commits

...

17 Commits

  1. 24
      et_Info/InfoHandler.go
  2. 2
      et_analyze/aggThreshold.go
  3. 19
      et_analyze/threshold.go
  4. 29
      et_cache/cacheHandler.go
  5. 29
      et_calc/dataCalc.go
  6. 36
      et_calc/group/groupCalc.go
  7. 9
      et_print/printHandler.go
  8. 52
      et_push/pushHandler.go
  9. 15
      et_rpc/go.mod
  10. 32
      et_rpc/go.sum
  11. 1147
      et_rpc/pb/et_rpc.pb.go
  12. 349
      et_rpc/pb/et_rpc_grpc.pb.go
  13. 124
      et_rpc/proto/et_rpc.proto
  14. 58
      et_rpc/proto/go-protobuf生成帮助.txt
  15. 22
      et_rpc/rpc.go
  16. 57
      et_rpc/rpc_test.go
  17. 161
      et_sink/sinkHandler.go
  18. 25
      master/app/app.go
  19. 635
      master/app/et_master.go
  20. 137
      master/app/master_rpc_service.go
  21. 36
      master/data_source/data_agg_handler.go
  22. 4
      master/data_source/data_agg_handler_test.go
  23. 49
      master/data_source/data_raw_handler.go
  24. 317
      master/data_source/kafka_consumerGroup_aggHandler.go
  25. 329
      master/data_source/kafka_consumerGroup_iotaHandler.go
  26. 278
      master/data_source/kafka_dataSource.go
  27. 67
      master/data_source/kafka_producer.go
  28. 112
      master/node_manager/connection_pool_grpc.go
  29. 128
      master/node_manager/load_balancer.go
  30. 186
      master/node_manager/node_connection_grpc.go
  31. 58
      master/node_manager/node_manager.go
  32. 56
      master/node_manager/node_selector.go
  33. 85
      node/agg_worker/agg_node.go
  34. 108
      node/app/app.go
  35. 445
      node/app/et_node.go
  36. 242
      node/app/master_connection_grpc.go
  37. 112
      node/app/master_connection_pool_grpc.go
  38. 68
      node/app/node_process_stage.go
  39. 455
      node/app/node_server.go
  40. 75
      node/et_worker/et_recv/recvDataHanler.go
  41. 43
      node/stages/stage.go
  42. 49
      node/stages/stageManage.go
  43. 80
      node/stages/stage_test.go

24
et_Info/InfoHandler.go

@ -3,9 +3,9 @@ package et_Info
import (
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"log"
"node/stages"
"sync"
)
type InfoHandler struct {
@ -13,13 +13,13 @@ type InfoHandler struct {
stage *stages.Stage
}
func NewInfoHandler() *InfoHandler {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
func NewInfoHandler(configHelper *common_utils.ConfigHelper) *InfoHandler {
//redisAddr := configLoad.LoadConfig().GetString("redis.address")
the := &InfoHandler{
configHelper: common_utils.NewConfigHelper(redisAddr),
configHelper: configHelper, //common_utils.NewConfigHelper(redisAddr),
stage: stages.NewStage("测点信息获取"),
}
the.stage.AddProcess(the.getStationInfo)
the.stage.AddProcess(the.getStationInfos)
return the
}
@ -27,8 +27,20 @@ func (the *InfoHandler) GetStage() stages.Stage {
return *the.stage
}
func (the *InfoHandler) getStationInfo(p *common_models.ProcessData) *common_models.ProcessData {
func (the *InfoHandler) getStationInfos(data []*common_models.ProcessData) []*common_models.ProcessData {
var wg sync.WaitGroup // 初始化 WaitGroup
for _, processData := range data {
wg.Add(1)
go func(pd *common_models.ProcessData) {
defer wg.Done()
the.getStationInfo(pd)
}(processData)
}
wg.Wait()
return data
}
func (the *InfoHandler) getStationInfo(p *common_models.ProcessData) *common_models.ProcessData {
s, err := the.configHelper.GetDeviceStationObjs(p.DeviceData.DeviceId)
if err == nil && s != nil {
p.Stations = s

2
et_analyze/aggThreshold.go

@ -64,7 +64,7 @@ func (t *AggThresholdHandler) ProcessData(aggData *common_models.AggData) error
}
// 发布 kafka 消息
fmt.Printf("测点[%d-%s][kafka-topic:%s]%s\n", stationInfo.Id, stationInfo.Name, t.alarmTopic, jsonData)
log.Printf("测点[%d-%s][kafka-topic:%s]%s\n", stationInfo.Id, stationInfo.Name, t.alarmTopic, jsonData)
t.kafkaAsyncProducer.Publish(t.alarmTopic, jsonData)
return nil

19
et_analyze/threshold.go

@ -10,6 +10,7 @@ import (
"log"
"node/stages"
"strconv"
"sync"
)
type ThresholdHandler struct {
@ -34,7 +35,7 @@ func NewThresholdHandler() *ThresholdHandler {
kafkaAlarmTopic: alarmTopic,
}
model.stage.AddProcess(model.processData)
model.stage.AddProcess(model.processDatas)
return model
}
@ -43,6 +44,20 @@ func (t *ThresholdHandler) GetStage() stages.Stage {
}
// 必须
func (t *ThresholdHandler) processDatas(data []*common_models.ProcessData) []*common_models.ProcessData {
go func() {
var wg sync.WaitGroup // 初始化 WaitGroup
for _, processData := range data {
wg.Add(1)
go func(pd *common_models.ProcessData) {
defer wg.Done()
t.processData(pd)
}(processData)
}
wg.Wait()
}()
return data
}
func (t *ThresholdHandler) processData(resultData *common_models.ProcessData) *common_models.ProcessData {
if resultData == nil || resultData.Stations == nil || len(resultData.Stations) == 0 {
return resultData
@ -74,7 +89,7 @@ func (t *ThresholdHandler) processData(resultData *common_models.ProcessData) *c
func (t *ThresholdHandler) judgeThreshold(station *common_models.Station) *common_models.AlarmMsg {
// 检查测点是否有阈值配置信息
if station.Threshold == nil || station.Threshold.Items == nil {
log.Printf("测点[%d-%s]未配置阈值,无须进行阈值判断\n", station.Info.Id, station.Info.Name)
log.Printf("测点[%d-%s]未配置阈值,跳过\n", station.Info.Id, station.Info.Name)
return nil
}

29
et_cache/cacheHandler.go

@ -5,10 +5,9 @@ import (
"fmt"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_models/constant/redisKey"
"gitea.anxinyun.cn/container/common_utils"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"log"
"node/stages"
"sync"
)
type CacheHandler struct {
@ -16,19 +15,33 @@ type CacheHandler struct {
stage *stages.Stage
}
func NewCacheHandler() *CacheHandler {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
configHelper := common_utils.NewConfigHelper(redisAddr)
func NewCacheHandler(cacheServer *cacheSer.CacheServer) *CacheHandler {
//redisAddr := configLoad.LoadConfig().GetString("redis.address")
//configHelper := common_utils.NewConfigHelper(redisAddr)
the := &CacheHandler{
stage: stages.NewStage("测点数据缓存"),
cacheServer: cacheSer.NewCacheServer(configHelper),
stage: stages.NewStage("滑窗过滤"),
cacheServer: cacheServer, //cacheSer.NewCacheServer(configHelper),
}
the.stage.AddProcess(the.enqueue)
the.stage.AddProcess(the.enqueueForStations)
return the
}
func (the *CacheHandler) GetStage() stages.Stage {
return *the.stage
}
func (the *CacheHandler) enqueueForStations(data []*common_models.ProcessData) []*common_models.ProcessData {
var wg sync.WaitGroup // 初始化 WaitGroup
for _, processData := range data {
wg.Add(1)
go func(pd *common_models.ProcessData) {
defer wg.Done()
the.enqueue(pd)
}(processData)
}
wg.Wait()
return data
}
func (the *CacheHandler) enqueue(p *common_models.ProcessData) *common_models.ProcessData {
for _, station := range p.Stations {

29
et_calc/dataCalc.go

@ -17,6 +17,7 @@ import (
"node/stages"
"sort"
"strings"
"sync"
)
type CalcHandler struct {
@ -26,25 +27,37 @@ type CalcHandler struct {
stage *stages.Stage
}
func NewCalcHandler() *CalcHandler {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
esAddresses := configLoad.LoadConfig().GetStringSlice("es.addresses")
configHp := common_utils.NewConfigHelper(redisAddr)
func NewCalcHandler(configHelper *common_utils.ConfigHelper, cacheServer *cacheSer.CacheServer, esESHelper *dbHelper.ESHelper) *CalcHandler {
the := &CalcHandler{
cacheServer: cacheSer.NewCacheServer(configHp),
unitHelper: common_utils.NewUnitHelper(),
esESHelper: dbHelper.NewESHelper(esAddresses, "", ""),
cacheServer: cacheServer,
esESHelper: esESHelper,
unitHelper: common_utils.NewUnitHelper(configHelper),
stage: stages.NewStage("单测点计算"),
}
the.stage.AddProcess(the.calcFormula)
the.stage.AddProcess(the.calcFormulaForStations)
return the
}
func (the *CalcHandler) GetStage() stages.Stage {
return *the.stage
}
func (the *CalcHandler) calcFormulaForStations(data []*common_models.ProcessData) []*common_models.ProcessData {
var wg sync.WaitGroup // 初始化 WaitGroup
for _, processData := range data {
wg.Add(1)
go func(pd *common_models.ProcessData) {
defer wg.Done()
the.calcFormula(pd)
}(processData)
}
wg.Wait()
return data
}
// 单设备测点
func (the *CalcHandler) calcFormula(p *common_models.ProcessData) *common_models.ProcessData {
for i := range p.Stations {
for _, device := range p.Stations[i].Info.Devices {
//计算结果

36
et_calc/group/groupCalc.go

@ -62,16 +62,16 @@ type GroupCalc struct {
esHandler *et_sink.SinkHandler
}
func NewGroupCalc() *GroupCalc {
func NewGroupCalc(configHelper *common_utils.ConfigHelper) *GroupCalc {
calcTaskManager := &GroupCalc{
stage: stages.NewStage("测点分组计算"),
configHelper: GetConfigHelper(),
configHelper: configHelper,
signCalc: make(chan bool),
esHandler: et_sink.NewSinkGroupHandler(),
}
// 添加到 etNode 处理环节,实现数据加工 (缓存group各分项的主题数据 -> 分组计算、分组数据存储ES)
calcTaskManager.stage.AddProcess(calcTaskManager.processData)
calcTaskManager.stage.AddProcess(calcTaskManager.calcGroups)
return calcTaskManager
}
@ -79,9 +79,25 @@ func (gc *GroupCalc) GetStage() stages.Stage {
return *gc.stage
}
func (gc *GroupCalc) calcGroups(data []*common_models.ProcessData) []*common_models.ProcessData {
var wg sync.WaitGroup
result := make([]*common_models.ProcessData, len(data))
for i, p := range data {
wg.Add(1)
go func(i int, p *common_models.ProcessData) {
defer wg.Done()
result[i] = gc.processData(p)
}(i, p)
}
wg.Wait()
return result
}
// processData 的 stations 被改变了
func (gc *GroupCalc) processData(inData *common_models.ProcessData) *common_models.ProcessData {
log.Printf("************* 82行 分组一次处理开始 len(inData.Stations)=%d *************", len(inData.Stations))
log.Printf("********* 分组一次处理开始 len(inData.Stations)=%d *************", len(inData.Stations))
resultStations := make([]common_models.Station, 0)
// 分组超时任务
resultStations = append(resultStations, gc.processTimeoutTasks()...)
@ -95,12 +111,12 @@ func (gc *GroupCalc) processData(inData *common_models.ProcessData) *common_mode
calcRet, err := gc.cacheAndCalc(&station, inData.DeviceData.DimensionId, inData.DeviceData.TaskId, inData.DeviceData.AcqTime)
if err == nil {
log.Printf("************* 95行 沉降分组计算成功,返回记录数 len(calcRet)={%d} *************", len(calcRet))
log.Printf("******* 沉降分组计算成功,返回记录数 len(calcRet)={%d} *************", len(calcRet))
resultStations = append(resultStations, calcRet...)
} else {
println(err)
}
log.Printf("************* 98行 分组一次处理,缓存记录数 len(resultStations)={%d} *************", len(resultStations))
log.Printf("******** 分组一次处理,缓存记录数 len(resultStations)={%d} *************", len(resultStations))
}
}
@ -112,22 +128,22 @@ func (gc *GroupCalc) processData(inData *common_models.ProcessData) *common_mode
//filterSensorIds := []int{18, 20, 21}
//for _, number := range filterSensorIds {
// if number == station.Info.Id {
// log.Printf("*************** 110行 沉降分组测点有返回 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
// log.Printf("******* 122行 沉降分组测点有返回 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
// break
// }
//}
//// #TEST END
// 需要返回的测点
//log.Printf("*************** 121行 沉降分组测点有返回 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
//log.Printf("******* 128行 沉降分组测点有返回 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
result = append(result, station)
} else {
log.Printf("*************** 119行 分组计算异常的测点数据 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
log.Printf("****** 分组计算异常的测点数据 %v %v %v %+v *************", station.Info.Id, station.Info.Name, station.Data.CollectTime, station.Data)
}
}
// 计算后的 result 可能为空
log.Printf("************* 124行 分组一次处理结束 len(inData.Stations)=%d *************", len(result))
log.Printf("****** 分组一次处理结束 len(inData.Stations)=%d *************", len(result))
inData.Stations = result
return inData
}

9
et_print/printHandler.go

@ -19,10 +19,17 @@ func NewPrintHandler() *PrintHandler {
stage: stages.NewStage("测试打印"),
}
the.stage.AddProcess(the.print)
the.stage.AddProcess(the.printDatas)
return the
}
func (the *PrintHandler) printDatas(data []*common_models.ProcessData) []*common_models.ProcessData {
for _, processData := range data {
the.print(processData)
}
return data
}
func (the *PrintHandler) print(p *common_models.ProcessData) *common_models.ProcessData {
log.Printf("处理设备[%s]数据", p.DeviceData.Name)

52
et_push/pushHandler.go

@ -13,7 +13,7 @@ import (
)
type dataOut struct {
topic string
StructId int `json:"structId"`
Id int `json:"id"`
Name string `json:"name"`
Data map[string]any `json:"data"`
@ -40,7 +40,7 @@ func NewPushHandler() *PushHandler {
the := &PushHandler{
stage: stages.NewStage("测点数据推送"),
signBatch: make(chan bool, 1),
batchCount: 500,
batchCount: 100,
}
the.addClients()
go the.publishBatchMonitor()
@ -64,6 +64,7 @@ func (the *PushHandler) addClients() {
)
the.pushClients = append(the.pushClients, mq)
}
kafkaEnable := configLoad.LoadConfig().GetBool("push.kafka.enable")
if kafkaEnable {
kafkaBrokers := configLoad.LoadConfig().GetStringSlice("push.kafka.brokers")
@ -71,26 +72,32 @@ func (the *PushHandler) addClients() {
the.pushClients = append(the.pushClients, ka)
}
}
func (the *PushHandler) push(p *common_models.ProcessData) *common_models.ProcessData {
func (the *PushHandler) push(data []*common_models.ProcessData) []*common_models.ProcessData {
if len(the.pushClients) == 0 {
return p
return data
}
for _, station := range p.Stations {
dataPush := dataOut{
topic: fmt.Sprintf("etpush/%d/%d", station.Info.StructureId, station.Info.Id),
Id: station.Info.Id,
Name: station.Info.Name,
Data: station.Data.ThemeData,
CollectTime: station.Data.CollectTime,
for _, p := range data {
for _, station := range p.Stations {
dataPush := dataOut{
//topic: fmt.Sprintf("etpush/%d/%d", station.Info.StructureId, station.Info.Id),
StructId: station.Info.StructureId,
Id: station.Info.Id,
Name: station.Info.Name,
Data: station.Data.ThemeData,
CollectTime: station.Data.CollectTime,
}
the.dataQueue = append(the.dataQueue, dataPush)
}
the.dataQueue = append(the.dataQueue, dataPush)
}
if len(the.dataQueue) >= the.batchCount {
log.Printf("推送队列 len=%d > %d,触发 批信号", len(the.dataQueue), the.batchCount)
the.signBatch <- true
}
return p
return data
}
func (the *PushHandler) publish(dataArrayOut []dataOut) {
@ -98,10 +105,24 @@ func (the *PushHandler) publish(dataArrayOut []dataOut) {
log.Printf("[client-%d]publish %d 条数据", i, len(dataArrayOut))
for _, out := range dataArrayOut {
outBytes, _ := json.Marshal(out)
client.Publish(out.topic, outBytes)
topic := fmt.Sprintf("etpush/%d/%d", out.StructId, out.Id)
client.Publish(topic, outBytes)
}
}
}
func (the *PushHandler) publishArray(dataArrayOut []dataOut) {
outBytes, err := json.Marshal(dataArrayOut)
if err != nil {
log.Printf("JSON Marshal error: %v", err)
return
}
// 遍历每个客户端并推送数据
for i, client := range the.pushClients {
log.Printf("[client-%d] publish %d 条数据", i, len(dataArrayOut))
client.Publish("etpush/0/0", outBytes)
}
}
func (the *PushHandler) publishBatchMonitor() {
@ -116,7 +137,8 @@ func (the *PushHandler) publishBatchMonitor() {
count := len(the.dataQueue)
needPush := the.dataQueue[:count]
the.dataQueue = the.dataQueue[count:]
go the.publish(needPush)
//go the.publish(needPush)
go the.publishArray(needPush)
}
}
}

15
et_rpc/go.mod

@ -0,0 +1,15 @@
module et_rpc
go 1.23.1
require (
google.golang.org/grpc v1.69.4
google.golang.org/protobuf v1.36.2
)
require (
golang.org/x/net v0.30.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.19.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect
)

32
et_rpc/go.sum

@ -0,0 +1,32 @@
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY=
go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE=
go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE=
go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY=
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc=
go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8=
go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys=
go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 h1:X58yt85/IXCx0Y3ZwN6sEIKZzQtDEYaBWrDvErdXrRE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A=
google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
google.golang.org/protobuf v1.36.2 h1:R8FeyR1/eLmkutZOM5CWghmo5itiG9z0ktFlTVLuTmU=
google.golang.org/protobuf v1.36.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=

1147
et_rpc/pb/et_rpc.pb.go

File diff suppressed because it is too large

349
et_rpc/pb/et_rpc_grpc.pb.go

@ -0,0 +1,349 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc v5.26.1
// source: et_rpc.proto
package pb
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
NodeService_HandleIotaData_FullMethodName = "/et_rpc.NodeService/HandleIotaData"
NodeService_HandleAggData_FullMethodName = "/et_rpc.NodeService/HandleAggData"
)
// NodeServiceClient is the client API for NodeService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
//
// NodeService 定义
type NodeServiceClient interface {
// 处理 Iota 数据并返回节点响应
HandleIotaData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error)
// 处理聚集数据并返回节点响应
HandleAggData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error)
}
type nodeServiceClient struct {
cc grpc.ClientConnInterface
}
func NewNodeServiceClient(cc grpc.ClientConnInterface) NodeServiceClient {
return &nodeServiceClient{cc}
}
func (c *nodeServiceClient) HandleIotaData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(HandleDataResponse)
err := c.cc.Invoke(ctx, NodeService_HandleIotaData_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *nodeServiceClient) HandleAggData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(HandleDataResponse)
err := c.cc.Invoke(ctx, NodeService_HandleAggData_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// NodeServiceServer is the server API for NodeService service.
// All implementations must embed UnimplementedNodeServiceServer
// for forward compatibility.
//
// NodeService 定义
type NodeServiceServer interface {
// 处理 Iota 数据并返回节点响应
HandleIotaData(context.Context, *HandleDataRequest) (*HandleDataResponse, error)
// 处理聚集数据并返回节点响应
HandleAggData(context.Context, *HandleDataRequest) (*HandleDataResponse, error)
mustEmbedUnimplementedNodeServiceServer()
}
// UnimplementedNodeServiceServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedNodeServiceServer struct{}
func (UnimplementedNodeServiceServer) HandleIotaData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method HandleIotaData not implemented")
}
func (UnimplementedNodeServiceServer) HandleAggData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method HandleAggData not implemented")
}
func (UnimplementedNodeServiceServer) mustEmbedUnimplementedNodeServiceServer() {}
func (UnimplementedNodeServiceServer) testEmbeddedByValue() {}
// UnsafeNodeServiceServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to NodeServiceServer will
// result in compilation errors.
type UnsafeNodeServiceServer interface {
mustEmbedUnimplementedNodeServiceServer()
}
func RegisterNodeServiceServer(s grpc.ServiceRegistrar, srv NodeServiceServer) {
// If the following call pancis, it indicates UnimplementedNodeServiceServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&NodeService_ServiceDesc, srv)
}
func _NodeService_HandleIotaData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HandleDataRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(NodeServiceServer).HandleIotaData(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: NodeService_HandleIotaData_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(NodeServiceServer).HandleIotaData(ctx, req.(*HandleDataRequest))
}
return interceptor(ctx, in, info, handler)
}
func _NodeService_HandleAggData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HandleDataRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(NodeServiceServer).HandleAggData(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: NodeService_HandleAggData_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(NodeServiceServer).HandleAggData(ctx, req.(*HandleDataRequest))
}
return interceptor(ctx, in, info, handler)
}
// NodeService_ServiceDesc is the grpc.ServiceDesc for NodeService service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var NodeService_ServiceDesc = grpc.ServiceDesc{
ServiceName: "et_rpc.NodeService",
HandlerType: (*NodeServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "HandleIotaData",
Handler: _NodeService_HandleIotaData_Handler,
},
{
MethodName: "HandleAggData",
Handler: _NodeService_HandleAggData_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "et_rpc.proto",
}
const (
MasterService_RegisterNode_FullMethodName = "/et_rpc.MasterService/RegisterNode"
MasterService_HeartbeatNode_FullMethodName = "/et_rpc.MasterService/HeartbeatNode"
MasterService_UnregisterNode_FullMethodName = "/et_rpc.MasterService/UnregisterNode"
)
// MasterServiceClient is the client API for MasterService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
//
// MasterService 定义
type MasterServiceClient interface {
RegisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error)
HeartbeatNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error)
UnregisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error)
}
type masterServiceClient struct {
cc grpc.ClientConnInterface
}
func NewMasterServiceClient(cc grpc.ClientConnInterface) MasterServiceClient {
return &masterServiceClient{cc}
}
func (c *masterServiceClient) RegisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(RpcResponse)
err := c.cc.Invoke(ctx, MasterService_RegisterNode_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *masterServiceClient) HeartbeatNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(RpcResponse)
err := c.cc.Invoke(ctx, MasterService_HeartbeatNode_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *masterServiceClient) UnregisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(RpcResponse)
err := c.cc.Invoke(ctx, MasterService_UnregisterNode_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// MasterServiceServer is the server API for MasterService service.
// All implementations must embed UnimplementedMasterServiceServer
// for forward compatibility.
//
// MasterService 定义
type MasterServiceServer interface {
RegisterNode(context.Context, *NodeRequest) (*RpcResponse, error)
HeartbeatNode(context.Context, *NodeRequest) (*RpcResponse, error)
UnregisterNode(context.Context, *NodeRequest) (*RpcResponse, error)
mustEmbedUnimplementedMasterServiceServer()
}
// UnimplementedMasterServiceServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedMasterServiceServer struct{}
func (UnimplementedMasterServiceServer) RegisterNode(context.Context, *NodeRequest) (*RpcResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method RegisterNode not implemented")
}
func (UnimplementedMasterServiceServer) HeartbeatNode(context.Context, *NodeRequest) (*RpcResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method HeartbeatNode not implemented")
}
func (UnimplementedMasterServiceServer) UnregisterNode(context.Context, *NodeRequest) (*RpcResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method UnregisterNode not implemented")
}
func (UnimplementedMasterServiceServer) mustEmbedUnimplementedMasterServiceServer() {}
func (UnimplementedMasterServiceServer) testEmbeddedByValue() {}
// UnsafeMasterServiceServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to MasterServiceServer will
// result in compilation errors.
type UnsafeMasterServiceServer interface {
mustEmbedUnimplementedMasterServiceServer()
}
func RegisterMasterServiceServer(s grpc.ServiceRegistrar, srv MasterServiceServer) {
// If the following call pancis, it indicates UnimplementedMasterServiceServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&MasterService_ServiceDesc, srv)
}
func _MasterService_RegisterNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(NodeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MasterServiceServer).RegisterNode(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: MasterService_RegisterNode_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MasterServiceServer).RegisterNode(ctx, req.(*NodeRequest))
}
return interceptor(ctx, in, info, handler)
}
func _MasterService_HeartbeatNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(NodeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MasterServiceServer).HeartbeatNode(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: MasterService_HeartbeatNode_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MasterServiceServer).HeartbeatNode(ctx, req.(*NodeRequest))
}
return interceptor(ctx, in, info, handler)
}
func _MasterService_UnregisterNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(NodeRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(MasterServiceServer).UnregisterNode(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: MasterService_UnregisterNode_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(MasterServiceServer).UnregisterNode(ctx, req.(*NodeRequest))
}
return interceptor(ctx, in, info, handler)
}
// MasterService_ServiceDesc is the grpc.ServiceDesc for MasterService service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var MasterService_ServiceDesc = grpc.ServiceDesc{
ServiceName: "et_rpc.MasterService",
HandlerType: (*MasterServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "RegisterNode",
Handler: _MasterService_RegisterNode_Handler,
},
{
MethodName: "HeartbeatNode",
Handler: _MasterService_HeartbeatNode_Handler,
},
{
MethodName: "UnregisterNode",
Handler: _MasterService_UnregisterNode_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "et_rpc.proto",
}

124
et_rpc/proto/et_rpc.proto

@ -0,0 +1,124 @@
syntax = "proto3";
package et_rpc;
option go_package = "/pb";
message NodeArgs{
string id = 1; // ID
string addr = 2; //
int32 load = 3; //
string resource_json = 4; // 使JSON
int32 weight = 5; //
NodeState status = 6; //
RPCReplyCode err_code = 7; //
string err_message = 8; //
}
message NodeResponse {
string id = 1;
string addr = 2;
RPCReplyCode err_code = 3;
string err_message = 4;
}
message NodeRegistrationRequest {
string node_id = 1;
string node_addr = 2;
}
message NodeStatusRequest {
string node_id = 1;
}
message NodeStatusResponse {
string node_id = 1;
string status = 2; // "active", "inactive"
}
//
enum NodeState {
UNKNOWN = 0;
ACTIVE = 1;
INACTIVE = 2;
}
enum RPCReplyCode {
SUCCESS = 0;
FAILURE = 1;
}
//
message KeyValue {
string key = 1; //
double value = 2; //
}
// AggData消息
message AggData {
string date = 1; // 使protobuf的时间戳类型
int32 sensor_id = 2; // SensorId
int32 struct_id = 3; // StructId
int32 factor_id = 4; // FactorId
int32 agg_type_id = 5; //
int32 agg_method_id = 6; //
repeated KeyValue agg = 7; //
repeated KeyValue changed = 8; //
string thing_id = 9; // ThingId
}
message NodeRequest{
string id = 1;
string address = 2;
repeated string thing_ids = 3;
}
message RpcResponse {
enum Status {
SUCCESS = 0; //
FAILURE = 1; //
INVALID_ARGUMENT = 2; //
NOT_FOUND = 3; //
INTERNAL_ERROR = 4; //
//
}
Status status = 1; //
string error_message = 2; //
}
message HandleDataResponse{
enum Status {
SUCCESS = 0; //
FAILURE = 1; //
INVALID_ARGUMENT = 2; //
INTERNAL_ERROR = 4; //
//
}
string addr = 1; //
int32 load = 2; //
Status status = 3; //
string error_message = 4; //
}
message HandleDataRequest {
string id = 1;
repeated string messages = 2;
}
// NodeService
service NodeService {
// Iota
rpc HandleIotaData(HandleDataRequest) returns (HandleDataResponse);
//
rpc HandleAggData(HandleDataRequest) returns (HandleDataResponse);
}
// MasterService
service MasterService {
rpc RegisterNode(NodeRequest) returns (RpcResponse);
rpc HeartbeatNode(NodeRequest) returns (RpcResponse);
rpc UnregisterNode(NodeRequest) returns (RpcResponse);
}

58
et_rpc/proto/go-protobuf生成帮助.txt

@ -0,0 +1,58 @@
gRPC https://grpc.org.cn/docs/guides/cancellation/
go get google.golang.org/grpc
go get google.golang.org/protobuf/cmd/protoc-gen-go
go get google.golang.org/grpc/cmd/protoc-gen-go-grpc
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
生成代码出错,问题解决:
查找
go list -m all | findstr grpc
go list -m all | findstr protobuf
移除不必要的依赖
go get -u github.com/gogo/protobuf@nonego get -u github.com/golang/protobuf@none
go get -u github.com/matttproud/golang_protobuf_extensions@none
更新 go.mod
go mod tidy
重新生成代码,打开命令行执行以下命令:
cd et_rpc
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative --go-grpc_out=./pb --go-grpc_opt=paths=source_relative proto/*.proto
==============================================================
1. 使用 Protobuf 编译器 protoc 生成 Go 代码
基本的 Protobuf 编译指令 protoc --proto_path=IMPORT_PATH --go_out=OUTPUT_PATH --go_opt=paths=source_relative your_proto_file.proto
参数说明
--proto_path=IMPORT_PATH:指定 .proto 文件的搜索路径。可以使用多个 --proto_path 选项来指定多个路径。
--go_out=OUTPUT_PATH:指定生成的 Go 代码的输出路径。可以使用 . 表示当前目录。
--go_opt=paths=source_relative:这个选项使生成的 Go 文件的包路径与 .proto 文件的相对路径一致,通常推荐使用。
your_proto_file.proto:要编译的 .proto 文件的名称。
***** ET-GO 系统的编译指令(cd 致 et-go/et_rpc 下执行)
1) 只生成消息类型的Go代码、序列化和反序列化方法、其他辅助方法:
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative proto/*.proto
2)生成消息类型Go代码、序列化和反序列化方法、gRPC服务的Go代码、gRPC方法的实现:
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative --go-grpc_out=./pb --go-grpc_opt=paths=source_relative proto/*.proto
参数说明
--proto_path=proto:指定 .proto 文件的搜索路径为 proto 目录。
--go_out=./pb:指定生成的 Go 代码输出到 pb 目录。
--go_opt=paths=source_relative:确保生成的 Go 文件的包路径与 .proto 文件的相对路径一致。
proto/*.proto:使用通配符 *.proto 来编译 proto 目录下的所有 .proto 文件。
/et-go
├── et_rpc
│ ├── pb # 存放生成的 Go 文件
│ │ └── iota_data.pb.go # 生成的 Go 文件
│ └── proto # 存放 Protobuf 文件
│ └── iota_data.proto # IotaData Protobuf 文件
└── 其他

22
et_rpc/rpc.go

@ -0,0 +1,22 @@
package et_rpc
type RPCReplyCode int
type NodeState int
const (
NodeState_Healthy NodeState = iota
NodeState_Unhealthy
)
type NodeArgs struct {
ID string
Addr string
Load int // 节点荷载(主要为积压的数据量)
ResourceJson string // CPU\内存\硬盘 使用情况JSON
Weight int // 权重(暂时未用到)
Status NodeState // 节点状态(健康 | 不健康)
ErrCode RPCReplyCode // RPCReply_Success | RPCReply_Failure
ErrMessage string
}

57
et_rpc/rpc_test.go

@ -0,0 +1,57 @@
package et_rpc
import (
"encoding/json"
"et_rpc/pb"
"fmt"
"google.golang.org/protobuf/proto"
"log"
"testing"
)
// TestProtoJSONConversion 测试 Protobuf 和 JSON 之间的转换
func TestProtoJSONConversion(t *testing.T) {
// 创建请求
request := createHandleDataRequest()
// 序列化请求为 Protobuf 格式
data, err := proto.Marshal(request)
if err != nil {
log.Fatalf("Failed to marshal request: %v", err)
}
// 打印序列化后的数据
fmt.Println("Serialized HandleDataRequest:", data)
// 这里可以将序列化后的数据发送到 gRPC 服务
// 例如:client.HandleIotaData(context.Background(), request)
}
func createJSONData(id string, value float64) (string, error) {
data := map[string]interface{}{
"id": id,
"value": value,
}
jsonData, err := json.Marshal(data)
if err != nil {
return "", err
}
return string(jsonData), nil
}
func createHandleDataRequest() *pb.HandleDataRequest {
request := &pb.HandleDataRequest{}
// 添加 JSON 数据消息
json1, _ := createJSONData("1", 10.5)
json2, _ := createJSONData("2", 20.3)
json3, _ := createJSONData("3", 15.8)
request.Messages = append(request.Messages, json1)
request.Messages = append(request.Messages, json2)
request.Messages = append(request.Messages, json3)
return request
}

161
et_sink/sinkHandler.go

@ -25,10 +25,10 @@ type SinkHandler struct {
const defaultBatchCount = 1000
func NewSinkThemeHandler() *SinkHandler {
func NewSinkThemeHandler(storageConsumers []storageDBs.IStorageConsumer) *SinkHandler {
the := &SinkHandler{
stage: stages.NewStage("Theme 数据存储"),
storageConsumers: storageDBs.LoadIStorageConsumer(),
storageConsumers: storageConsumers,
dataQueueTheme: make([]common_models.EsTheme, 0, defaultBatchCount),
lock: &sync.RWMutex{},
signBatch: make(chan bool, 1),
@ -40,15 +40,14 @@ func NewSinkThemeHandler() *SinkHandler {
return the
}
func NewSinkRawHandler() *SinkHandler {
esAddresses := configLoad.LoadConfig().GetStringSlice("es.addresses")
log.Printf("es addresses: %v", esAddresses)
func NewSinkRawHandler(storageConsumers []storageDBs.IStorageConsumer) *SinkHandler {
the := &SinkHandler{
stage: stages.NewStage("raws 数据存储"),
storageConsumers: storageDBs.LoadIStorageConsumer(),
storageConsumers: storageConsumers,
dataQueueRaw: make([]common_models.EsRaw, 0, defaultBatchCount),
dataQueueVib: make([]common_models.EsVbRaw, 0, defaultBatchCount),
lock: &sync.RWMutex{},
signBatch: make(chan bool, 1),
batchCount: defaultBatchCount,
}
go the.dumpRawBatchMonitor()
@ -60,10 +59,114 @@ func (the *SinkHandler) GetStage() stages.Stage {
return *the.stage
}
func (the *SinkHandler) sinkRawData(p *common_models.ProcessData) *common_models.ProcessData {
go the.sinkRawDataToES(p.DeviceData)
return p
//func (the *SinkHandler) sinkRawData(p *common_models.ProcessData) *common_models.ProcessData {
// go the.sinkRawDataToES(p.DeviceData)
// return p
//}
func (the *SinkHandler) sinkRawData(data []*common_models.ProcessData) []*common_models.ProcessData {
go func() {
dataQueueRaw := make([]common_models.EsRaw, 0, len(data))
dataQueueVib := make([]common_models.EsVbRaw, 0, len(data))
for _, p := range data {
deviceData := p.DeviceData
switch deviceData.DataType {
case common_models.RawTypeVib:
vibData := deviceData.GetVibrationData()
vbRaws := common_models.EsVbRaw{
StructId: deviceData.StructId,
IotaDeviceName: deviceData.Name,
Param: vibData.FormatParams(),
Data: map[string]any{"raw": vibData.Data},
CollectTime: deviceData.AcqTime.Truncate(time.Millisecond),
IotaDevice: deviceData.DeviceId,
CreateTime: time.Now().Truncate(time.Millisecond),
}
dataQueueVib = append(dataQueueVib, vbRaws)
case common_models.RawTypeDiag:
// 处理诊断数据的逻辑
default:
if deviceData.Raw == nil {
msg, _ := json.Marshal(deviceData)
log.Printf("异常空,raw数据 =%s", string(msg))
} else {
esRaws := common_models.EsRaw{
StructId: deviceData.StructId,
IotaDeviceName: deviceData.Name,
Data: deviceData.Raw,
CollectTime: deviceData.AcqTime.Truncate(time.Millisecond),
Meta: deviceData.DeviceInfo.DeviceMeta.GetOutputProps(),
IotaDevice: deviceData.DeviceId,
CreateTime: time.Now().Truncate(time.Millisecond),
}
dataQueueRaw = append(dataQueueRaw, esRaws)
}
}
}
if len(dataQueueRaw) > 0 {
count := len(dataQueueRaw)
log.Printf("es写入dataQueueRaw数据 count====> %d", count)
go the.dumpRaws(dataQueueRaw)
}
if len(the.dataQueueVib) > 0 {
log.Printf("es写入dataQueueVib数据 count====> %d", len(dataQueueVib))
go the.dumpVibRaws(dataQueueVib)
}
}()
return data
}
func (the *SinkHandler) sinkRawData2(data []*common_models.ProcessData) []*common_models.ProcessData {
go func() {
the.lock.Lock()
for _, p := range data {
deviceData := p.DeviceData
switch deviceData.DataType {
case common_models.RawTypeVib:
vibData := deviceData.GetVibrationData()
vbRaws := common_models.EsVbRaw{
StructId: deviceData.StructId,
IotaDeviceName: deviceData.Name,
Param: vibData.FormatParams(),
Data: map[string]any{"raw": vibData.Data},
CollectTime: deviceData.AcqTime.Truncate(time.Millisecond),
IotaDevice: deviceData.DeviceId,
CreateTime: time.Now().Truncate(time.Millisecond),
}
the.dataQueueVib = append(the.dataQueueVib, vbRaws)
case common_models.RawTypeDiag:
default:
if deviceData.Raw == nil {
msg, _ := json.Marshal(deviceData)
log.Printf("异常空,raw数据 =%s", string(msg))
} else {
esRaws := common_models.EsRaw{
StructId: deviceData.StructId,
IotaDeviceName: deviceData.Name,
Data: deviceData.Raw,
CollectTime: deviceData.AcqTime.Truncate(time.Millisecond),
Meta: deviceData.DeviceInfo.DeviceMeta.GetOutputProps(),
IotaDevice: deviceData.DeviceId,
CreateTime: time.Now().Truncate(time.Millisecond),
}
the.dataQueueRaw = append(the.dataQueueRaw, esRaws)
}
}
}
the.lock.Unlock()
if len(the.dataQueueRaw) >= the.batchCount || len(the.dataQueueVib) >= the.batchCount {
the.signBatch <- true
}
}()
return data
}
func (the *SinkHandler) sinkRawDataToES(deviceData common_models.DeviceData) {
the.lock.Lock()
switch deviceData.DataType {
@ -81,7 +184,6 @@ func (the *SinkHandler) sinkRawDataToES(deviceData common_models.DeviceData) {
the.dataQueueVib = append(the.dataQueueVib, vbRaws)
case common_models.RawTypeDiag:
default:
if deviceData.Raw == nil {
msg, _ := json.Marshal(deviceData)
log.Printf("异常空,raw数据 =%s", string(msg))
@ -109,7 +211,7 @@ func (the *SinkHandler) dumpRawBatchMonitor() {
select {
case <-the.signBatch:
log.Printf("批存储信号raw,监控器收到")
case <-time.After(200 * time.Millisecond):
case <-time.After(500 * time.Millisecond):
}
if len(the.dataQueueRaw) > 0 {
the.lock.RLock()
@ -172,10 +274,40 @@ func (the *SinkHandler) dumpThemeBatchMonitor() {
}
func (the *SinkHandler) sinkThemeToES(p *common_models.ProcessData) *common_models.ProcessData {
go the.sinkThemeData(p.Stations)
return p
func (the *SinkHandler) sinkThemeToES(data []*common_models.ProcessData) []*common_models.ProcessData {
go func() {
dataQueueTheme := make([]common_models.EsTheme, 0, len(data))
for _, p := range data {
stations := p.Stations
for _, station := range stations {
esTheme := common_models.EsTheme{
SensorName: station.Info.Name,
FactorName: station.Info.Factor.Name,
FactorProtoCode: station.Info.Proto.Code,
Data: station.Data.ThemeData,
FactorProtoName: station.Info.Proto.Name,
Factor: station.Info.FactorId,
CollectTime: station.Data.CollectTime.Truncate(time.Millisecond),
Sensor: station.Info.Id,
Structure: station.Info.StructureId,
IotaDevice: station.Info.GetDeviceIdArray(),
CreateTime: time.Now().Truncate(time.Millisecond),
}
dataQueueTheme = append(dataQueueTheme, esTheme)
}
}
if len(dataQueueTheme) > 0 {
count := len(dataQueueTheme)
log.Printf("es写入 dataQueueTheme 数据 count====> %d", count)
go the.dumpThemes(dataQueueTheme)
}
}()
return data
}
func (the *SinkHandler) sinkThemeData(stations []common_models.Station) {
the.lock.Lock()
for _, station := range stations {
@ -223,7 +355,6 @@ func NewSinkGroupHandler() *SinkHandler {
}
go the.dumpGroupMonitor()
//the.stage.AddProcess(the.sinkGroupDataToES)
return the
}

25
master/app/app.go

@ -1,8 +1,8 @@
package app
import (
"dataSource/kafka"
"log"
"time"
)
func init() {
@ -10,16 +10,25 @@ func init() {
}
func Start() {
// 启动 master 服务
master := NewEtMaster()
go master.RegisterListen()
master := NewETMaster()
go master.StartRPCServer()
// 设置 Kafka 消费者配置信息
master.InitKafkaDataSource()
//等待node注册
master.WaitNodeRegister()
println("=======")
// -> 源数据
kafkaDataSource := kafka.NewKafkaDataSource()
go kafkaDataSource.Producer()
// 发布数据
go master.AggDataPublishing()
go master.RawDataPublishing()
time.Sleep(2 * time.Second)
// Kafka 数据消费与处理
go master.dataSource.RawDataProducer()
go master.dataSource.AggDataProducer()
// 将源数据 -> 各类型节点处理
master.DistributeData(kafkaDataSource.DataChannels)
// 监控系统关闭
master.MonitorShutdown()
}

635
master/app/et_master.go

@ -1,403 +1,398 @@
package app
import (
"dataSource"
"encoding/gob"
"errors"
"et_prometheus_exporter"
"et_rpc/pb"
"fmt"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"github.com/panjf2000/ants/v2"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"math"
"master/data_source"
"master/node_manager"
"net"
"net/rpc"
"strings"
"os"
"os/signal"
"sync"
"sync/atomic"
"syscall"
"time"
)
type EtMaster struct {
nodeMap sync.Map
exporter et_prometheus_exporter.PrometheusExporter
sleepCH chan bool
type SendStatus struct {
inProgressCount int32 // 正在处理的消息计数
limitThreshold int32 // 限流阈值
receiving int32 // 是否接收消息(1表示接收,0表示暂停)
}
func NewEtMaster() *EtMaster {
master := EtMaster{
exporter: et_prometheus_exporter.NewPrometheusExporter(),
sleepCH: make(chan bool, 1),
}
return &master
}
// ETMaster 管理 Master 的核心逻辑
type ETMaster struct {
nodeManager *node_manager.NodeManager
grpcServer *grpc.Server
masterRPCService *MasterRPCService
type NodeRpc struct {
args *common_models.NodeArgs // 注册节点参数:RPC服务名为master, 服务方法 NodeRegister 的输入参数
resultCH chan int // 注册节点参数:RPC服务名为master, 服务方法 NodeRegister 的输出结果
aggResultCH chan int // 聚集数据被处理后的返回结果 对应 Reply 参数
client *rpc.Client
dataSource *data_source.KafkaDataSource
aggDataHandlers sync.Map // 聚合数据处理者
rawDataHandlers sync.Map // 原始数据处理者
aggSendStatus SendStatus // 聚合数据发送状态
rawSendStatus SendStatus // 原始数据发送状态
errRawChan chan []string
errMessagesKafkaProducer *data_source.KafkaProducer // Kafka 生产者,用于发送失败的消息
}
// RegisterListen 启动 master RPC服务
func (the *EtMaster) RegisterListen() {
//监听
err := rpc.RegisterName("master", the)
if err != nil {
log.Println("master 提供注册服务异常")
return
// 创建 ETMaster 实例
func NewETMaster() *ETMaster {
lb := node_manager.NewLoadBalancer(&node_manager.RoundRobinSelector{})
nodeManager := node_manager.NewNodeManager(lb)
grpcServer := grpc.NewServer()
masterRPCService := NewMasterRPCService(nodeManager)
pb.RegisterMasterServiceServer(grpcServer, masterRPCService)
healthServer := health.NewServer()
grpc_health_v1.RegisterHealthServer(grpcServer, healthServer)
healthServer.SetServingStatus("MasterService", grpc_health_v1.HealthCheckResponse_SERVING)
return &ETMaster{
nodeManager: nodeManager,
grpcServer: grpcServer,
masterRPCService: masterRPCService,
}
}
func (mm *ETMaster) StartRPCServer() {
port := configLoad.LoadConfig().GetUint16("master.port")
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
log.Panic("master 启动 node服务注册功能异常")
log.Panicf("启动 Master RPC 服务失败: %v", err)
}
log.Printf("master 启动 node服务注册功能 :%d", port)
for {
//log.Println("master 监听新注册链接")
conn, err := listener.Accept()
if err != nil {
log.Println("master rpc Accept异常")
defer func() {
if err := listener.Close(); err != nil {
log.Printf("关闭监听器失败: %v", err)
}
log.Printf("master Accept注册链接 from node[%s]", conn.RemoteAddr())
go rpc.ServeConn(conn)
}()
log.Printf("启动 Master RPC 服务成功,服务端口:%d", port)
// 启动 gRPC 服务器
if err := mm.grpcServer.Serve(listener); err != nil {
log.Panicf("gRPC 服务器服务失败: %v", err)
}
}
// DistributeData 分发数据。
// 监听两个数据通道RawDataChan和AggDataChan,根据不同类型的数据通道接收到的数据,调用notifyData方法进行相应的处理操作。
func (the *EtMaster) DistributeData(dataChannels *dataSource.DataChannels) {
//数据类型注册
gob.Register([]interface{}{})
for {
log.Println("L74 nodeCount: %d", the.nodeMapCount())
if the.nodeMapCount() == 0 {
log.Printf("nodeList is empty!")
time.Sleep(time.Second * 10)
continue
}
// 初始化 Kafka 数据源
func (mm *ETMaster) InitKafkaDataSource() {
ds := data_source.NewKafkaDataSource() // 加载 kafka 相关的配置
select {
case stopEnable := <-the.sleepCH:
log.Println("L83 nodeCount: %d", the.nodeMapCount())
if stopEnable {
stopTime := time.Second * 10
log.Printf("node 处理积压,%v,master 暂停 %v", stopEnable, stopTime)
time.Sleep(stopTime)
} else {
log.Printf("node 处理积压,%v,不正常空数据", stopEnable)
}
default:
// 创建 kafka 生产者实例
producer, err := data_source.NewKafkaProducer(ds.Brokers)
if err != nil {
log.Fatalf("创建 Kafka 生产者失败: %v", err)
}
mm.errMessagesKafkaProducer = producer
// 设置 rawData 的处理者,每个分区一个处理者
if ds.Master_kafkaConsumer_config.RawData != nil {
topicCfg := ds.Topics["data_raw"]
for partId := 0; partId < topicCfg.Partitions; partId++ {
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId)
dataHandler := data_source.NewRawDataHandler(key, topicCfg.Topic, partId)
mm.rawDataHandlers.Store(key, dataHandler)
}
select {
case data := <-dataChannels.RawDataChan:
log.Println("L96 nodeCount: %d", the.nodeMapCount())
the.notifyData(&data, the.callNodeService)
case data := <-dataChannels.AggDataChan:
log.Println("L99 nodeCount: %d", the.nodeMapCount())
the.notifyData(&data, the.callNodeService)
//default:
// time.Sleep(100 * time.Millisecond)
// 发送失败的消息存入 DLP_DATA_RAW 主题)
dlpKey := "DLP_DATA_RAW"
mm.rawDataHandlers.Store(dlpKey, data_source.NewRawDataHandler(dlpKey, dlpKey, 0))
}
// 设置 aggData 的处理者,每个分区一个处理者
if ds.Master_kafkaConsumer_config.AggData != nil {
topicCfg := ds.Topics["data_agg"]
for partId := 0; partId < topicCfg.Partitions; partId++ {
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId)
dataHandler := data_source.NewAggDataHandler(key, topicCfg.Topic, partId)
mm.aggDataHandlers.Store(key, dataHandler)
}
// 发送失败的消息存入 DLP_DATA_AGG 主题
dlpKey := "DLP_DATA_AGG"
mm.rawDataHandlers.Store(dlpKey, data_source.NewRawDataHandler(dlpKey, dlpKey, 0))
}
ds.RawDataHandlers = &mm.rawDataHandlers
ds.AggDataHandlers = &mm.aggDataHandlers
mm.dataSource = ds
}
// 等待节点注册
func (mm *ETMaster) WaitNodeRegister() {
log.Println("==== 等待 Node 注册 ====")
for mm.masterRPCService.nodeManager.NodesCount() == 0 {
time.Sleep(time.Second * 10)
}
}
func (the *EtMaster) notifyData(data common_models.IDataTrace, callNodeFunc func(*NodeRpc, common_models.IDataTrace)) {
thingId := data.GetThingId()
isMatch := false
the.nodeMap.Range(func(address, value interface{}) bool {
if nodePtr, ok := value.(*NodeRpc); ok {
if nodePtr != nil {
if contains(nodePtr.args.ThingIds, thingId) {
isMatch = true
go callNodeFunc(nodePtr, data)
return false
// AggDataPublishing 发布聚合数据
func (mm *ETMaster) AggDataPublishing() {
concurrency := configLoad.LoadConfig().GetInt32("performance.master.rpc.concurrency") // 并发请求数 50
mm.initSendStatus(&mm.aggSendStatus, concurrency)
go mm.monitorSendStatus(&mm.aggSendStatus, "aggSendStatus")
mm.startDataPublishing(&mm.aggDataHandlers, "AggData", mm.sendAggData, &mm.aggSendStatus)
}
// RawDataPublishing 发布原始数据
func (mm *ETMaster) RawDataPublishing() {
concurrency := configLoad.LoadConfig().GetInt32("performance.master.rpc.concurrency") // 并发请求数 50
mm.initSendStatus(&mm.rawSendStatus, concurrency)
go mm.monitorSendStatus(&mm.rawSendStatus, "rawSendStatus")
mm.startDataPublishing(&mm.rawDataHandlers, "RawData", mm.sendRawData, &mm.rawSendStatus)
}
// initSendStatus 初始化发送状态
func (mm *ETMaster) initSendStatus(status *SendStatus, threshold int32) {
status.limitThreshold = threshold
atomic.StoreInt32(&status.receiving, 1)
}
// startDataPublishing 启动数据发布
func (mm *ETMaster) startDataPublishing(handlers *sync.Map, handlerType string, sendFunc func(string, []string) error, status *SendStatus) {
// 创建一个 Goroutine 池,最大并发数为 500
pool, err := ants.NewPool(500)
if err != nil {
log.Fatalf("创建 Goroutine 池失败: %v", err)
}
var wg sync.WaitGroup
index := 0
handlers.Range(func(key, value any) bool {
handler := value.(data_source.IMessageHandler)
dataChannel := handler.GetDataChannel()
log.Printf("启动[%s-Publishing]协程,Handler%d,dataChannel[%p] 容量:%d", handlerType, index, dataChannel, cap(dataChannel))
wg.Add(1)
go func(idx int) {
defer wg.Done()
for {
// 检查是否暂停接收
if atomic.LoadInt32(&status.receiving) == 0 {
log.Printf("%sHandler%d: 接收已暂停,等待未完成的消息处理", handlerType, idx)
time.Sleep(100 * time.Millisecond)
continue
}
select {
case d, ok := <-dataChannel: // 检查 dataChannel 是否已关闭
if !ok {
log.Printf("%sHandler%d: dataChannel 已关闭,退出 Goroutine", handlerType, idx)
return // 退出 Goroutine
}
data := d
atomic.AddInt32(&status.inProgressCount, 1)
log.Printf("[%s-Publishing] inProgressCount=%d. Handler%d 预备发送[%d]条数据,dataChannel[%p] 当前长度: %d/%d",
handlerType, atomic.LoadInt32(&status.inProgressCount), idx, len(data.Messages), dataChannel, len(dataChannel), cap(dataChannel))
// 使用 ants 提交任务
poolErr := pool.Submit(func() {
startTime := time.Now()
defer atomic.AddInt32(&status.inProgressCount, -1) // 任务完成后减少计数
if err := sendFunc(data.Id, data.Messages); err != nil {
log.Printf("%sHandler%d: 发送数据失败: %v. 耗时:%v", handlerType, idx, err, time.Since(startTime))
// 将失败数据发送到 Kafka(使用 Goroutine 池)
_ = pool.Submit(func() {
mm.errMessagesKafkaProducer.SendStringArrayMessage(fmt.Sprintf("DLP_%s", handlerType), data.Id, data.Messages)
})
} else {
log.Printf("[%s-Publishing]协程,Handler%d 成功发送[%d]条数据。耗时:%v,dataChannel[%p] 当前长度: %d/%d",
handlerType, idx, len(data.Messages), time.Since(startTime), dataChannel, len(dataChannel), cap(dataChannel))
}
})
if poolErr != nil {
log.Printf("%sHandler%d: 提交任务到 Goroutine 池失败: %v", handlerType, idx, poolErr)
atomic.AddInt32(&status.inProgressCount, -1) // 提交失败时减少计数
}
default:
// 如果 dataChannel 为空,则等待一段时间
time.Sleep(10 * time.Millisecond)
}
}
}
}(index)
index++
return true
})
//无匹配触发 reBalance
if !isMatch {
nodePtr := the.getNodeWithMinThings()
if nodePtr != nil {
nodePtr.args.ThingIds = append(nodePtr.args.ThingIds, thingId)
log.Printf("thingId:[%s]被分配到node:[%s]", thingId, nodePtr.args.Addr)
go callNodeFunc(nodePtr, data)
}
}
wg.Wait()
defer pool.Release() // 确保在函数结束时释放池
}
// callNodeService 调用 etNode 的RPC服务
func (the *EtMaster) callNodeService(node *NodeRpc, data common_models.IDataTrace) {
if node.client == nil {
log.Printf("node [%v] client=nil", node.args)
return
}
var serviceMethod = ""
var resultCH chan int
var v interface{}
switch data.(type) {
case *common_models.IotaData:
v = data.(*common_models.IotaData)
the.exporter.OnIotaData2metricByPrometheus(data.(*common_models.IotaData))
serviceMethod = "etNode.IotaDataHandler"
resultCH = node.resultCH
case *common_models.AggData:
v = data.(*common_models.AggData)
serviceMethod = "etNode.AggDataHandler"
resultCH = node.aggResultCH
default:
log.Printf("Unknown kafka data type:%v", v)
return
}
log.Printf("RPC[%s] node待处理的数据:%+v \n", serviceMethod, v)
func (mm *ETMaster) sendRawData(thingId string, data []string) error {
dataLog := fmt.Sprintf("thingId[%s]共[%d]条数据。", thingId, len(data))
//log.Printf("[RawData-Publishing][sendRawData]1.开始处理。%s", dataLog)
go func() {
defer timeCost(node.args.ID, data.Q(), time.Now())
var reply bool
err := node.client.Call(serviceMethod, data, &reply)
var nodeConn *node_manager.NodeConnection
var err error
retry := 0
result := boolToInt(reply)
// 尝试获取 NodeConnection
for retry < 3 {
startTime := time.Now()
nodeConn, err = mm.nodeManager.GetNodeConnection()
duration := time.Since(startTime) // 计算获取连接的耗时
log.Printf("[sendRawData]1.获取 NodeConnection 耗时: %v", duration)
if err != nil {
isAggParseErr := strings.Contains(err.Error(), "aggData非法数据")
log.Printf("master调用node异常。Error:%s", err.Error())
if !isAggParseErr {
// rpc 调用node, err:read tcp 10.8.30.104:57230->10.8.30.104:40000: wsarecv: An existing connection was forcibly closed by the remote host.
result = 2
}
log.Printf("[sendRawData]1.获取 NodeConnection 失败,错误: %v", err)
//m.kafkaDS.StopConsumers() // TODO 暂停消费 Kafka 消息
//log.Println("============ Kafka 消费已暂停...")
retry++
time.Sleep(time.Duration(2<<retry) * time.Second) // 指数退避
continue
}
resultCH <- result
}()
// RPC调用结果
errorCode := 0
timeoutMills := 300 * 1000 * time.Millisecond // 5分组
select {
case reply := <-resultCH:
// reply 0=false(RPC访问结果返回false),1=true(RPC访问结果返回true),2访问RPC网络异常
if reply == 2 {
log.Printf("RPC[%s]node连接已被关闭。未处理的数据*** %+v *** \n\n", serviceMethod, v)
errorCode = 200
} else if reply == 0 {
//log.Printf("RPC[%s]node处理后回复false。处理失败的数据*** %+v *** \n\n", serviceMethod, v)
errorCode = 100
}
case <-time.After(timeoutMills):
log.Printf("RPC[%s]node调用超时退出gorutine,timeout:%v。未处理的数据*** %+v *** \n\n", serviceMethod, timeoutMills, v)
errorCode = 300
// TODO 成功获取连接,恢复 Kafka 消费并退出循环
//m.kafkaDS.ResumeConsumers()
//log.Printf("[sendAggData] 成功获取 NodeConnection: %+v", nodeConn)
break
}
// 100 故障:程序内部问题
// 200 故障:网络通信问题
// 300 故障:处理超时
if errorCode >= 200 {
the.errorHandle(errorCode, node.args.Addr, fmt.Sprintf("%s|%s", data.R(), data.T()))
} else {
//log.Printf("node[%s]node处理后回复true。处理成功的数据*** %+v *** \n\n", node.args.Addr, data.R(), data.T())
//log.Printf("RPC[%s]node已处理的数据errorCode=%d *** %+v *** \n\n", serviceMethod, errorCode, v)
log.Printf("****** RPC[%s]node已处理的数据errorCode=%d ****** \n\n", serviceMethod, errorCode)
if err != nil || nodeConn == nil {
log.Printf("[sendRawData]1. 达到最大重试次数,无法获取健康节点连接,错误: %v", err)
return err
}
}
// NodeRegister 是 RPC 服务方法,由 et_node 远程调用
func (the *EtMaster) NodeRegister(nodeArgs *common_models.NodeArgs, reply *bool) error {
node := &NodeRpc{
args: nodeArgs,
resultCH: make(chan int, 1),
aggResultCH: make(chan int, 1),
client: nil,
}
//master 初始化 node client
client, err := rpc.Dial("tcp", nodeArgs.Addr)
if err != nil {
log.Printf("链接node失败-> node[%v]", nodeArgs.Addr)
return err
// 记录调用 Node.ProcessData 的时间
//defer LogProcessDataTimeCost(nodeConn.NArgs.Addr, "[]aggData", time.Now())
// RPC 调用 Node.ProcessData,传递 []*pb.AggData
resultChan := make(chan error, 1)
log.Printf("[sendRawData]2.开始调用 RPC[Node.HandleRawData] %s", dataLog)
callStartTime := time.Now()
callErr := nodeConn.CallHandleIotaData(thingId, data)
log.Printf("<--[sendRawData]3.RPC调用成功。耗时: %v,%s", time.Since(callStartTime), dataLog)
resultChan <- callErr
// 设置超时
select {
case callErr := <-resultChan:
if callErr != nil {
log.Printf("[sendRawData]4.RPC调用结束,错误: %+v,%s", callErr, dataLog)
return callErr
}
//log.Printf("[sendRawData]4.RPC调用成功")
case <-time.After(5 * time.Minute): // 设置超时
log.Printf("[sendRawData]4.请求超过5分钟。%s", dataLog)
return errors.New("请求超时5m")
}
node.client = client
the.addOrUpdate(nodeArgs.Addr, node)
log.Printf("node服务[%v] 注册成功", nodeArgs)
the.printNodes()
*reply = true
return nil
}
func (the *EtMaster) NodeHeart(nodeArgs *common_models.NodeArgs, reply *bool) error {
if !the.clientIsValid(nodeArgs.Addr) {
log.Printf("收到-未注册的node[%v] 心跳", nodeArgs)
*reply = false
err := the.NodeRegister(nodeArgs, reply)
func (mm *ETMaster) sendAggData(structId string, data []string) error {
dataLog := fmt.Sprintf("structId[%s]共[%d]条数据。", structId, len(data))
//log.Printf("[AggData-Publishing][sendAggData]1.开始处理。%s", dataLog)
var nodeConn *node_manager.NodeConnection
var err error
retry := 0
for retry < 3 {
startTime := time.Now()
nodeConn, err = mm.nodeManager.GetNodeConnection()
duration := time.Since(startTime) // 计算获取连接的耗时
log.Printf("[AggData-Publishing][sendAggData]2.获取 NodeConnection 耗时: %v", duration)
if err != nil {
return errors.New("未注册的node")
} else {
*reply = true
log.Printf("收到未注册的node[%v]心跳,master已将node重新注册。", nodeArgs)
return nil
log.Printf("[AggData-Publishing][sendAggData]2.1获取 NodeConnection 失败,错误: %v", err)
//m.kafkaDS.StopConsumers() // TODO 暂停消费 Kafka 消息
//log.Println("============ Kafka 消费已暂停...")
retry++
time.Sleep(time.Duration(2<<retry) * time.Second) // 指数退避
continue
}
// TODO 成功获取连接,恢复 Kafka 消费并退出循环
//m.kafkaDS.ResumeConsumers()
//log.Printf("[sendAggData] 成功获取 NodeConnection: %+v", nodeConn)
break
}
log.Printf("收到-node[%v] 心跳", nodeArgs)
*reply = true
if err != nil || nodeConn == nil {
log.Printf("[AggData-Publishing][sendAggData]2.2 达到最大重试次数,无法获取健康节点连接,错误: %v", err)
return err
}
return nil
}
// 记录调用 Node.ProcessData 的时间
//defer LogProcessDataTimeCost(nodeConn.NArgs.Addr, "[]aggData", time.Now())
// RPC 调用 Node.ProcessData,传递 []*pb.AggData
resultChan := make(chan error, 1)
log.Printf("[AggData-Publishing][sendAggData]3.开始调用 RPC[Node.HandleAggData] %s", dataLog)
callStartTime := time.Now()
callErr := nodeConn.CallHandleAggData(structId, data)
log.Printf("[AggData-Publishing][sendAggData]4.RPC调用耗时: %v,%s", time.Since(callStartTime), dataLog)
resultChan <- callErr
// NodeUnRegister 节点RPC 注销
func (the *EtMaster) NodeUnRegister(nodeArgs *common_models.NodeArgs, reply *bool) error {
value, ok := the.nodeMap.Load(nodeArgs.Addr)
node := value.(*NodeRpc)
if ok && node.client != nil {
err := node.client.Close()
if err != nil {
log.Printf("节点[%s] client关闭异常 %s", nodeArgs.Addr, err.Error())
select {
case callErr := <-resultChan:
if callErr != nil {
log.Printf("[AggData-Publishing][sendAggData]4.RPC调用结束,错误: %+v,%s", callErr, dataLog)
return callErr
}
the.nodeMap.Delete(nodeArgs.Addr)
//log.Printf("[sendAggData]4.RPC调用成功")
case <-time.After(5 * time.Minute): // 设置超时
log.Printf("[AggData-Publishing][sendAggData]请求超过5分钟。%s", dataLog)
return errors.New("请求超时5m")
}
log.Printf("node服务[%v] 注销成功", nodeArgs)
*reply = true
return nil
}
func (the *EtMaster) WaitNodeRegister() {
log.Println("等待 node进行注册")
// monitorSendStatus 监控发送状态
func (mm *ETMaster) monitorSendStatus(status *SendStatus, statusName string) {
for {
if the.nodeMapCount() > 0 {
break
inProgressCount := atomic.LoadInt32(&status.inProgressCount)
if inProgressCount > status.limitThreshold {
atomic.StoreInt32(&status.receiving, 0)
log.Printf("[%s] 未完成消息数量超过阈值,暂停接收新的消息。%+v\n", statusName, status)
} else {
atomic.StoreInt32(&status.receiving, 1)
}
time.Sleep(time.Second * 10)
time.Sleep(500 * time.Millisecond)
}
}
func (the *EtMaster) ConnectNode() {
the.nodeMap.Range(func(key, value interface{}) bool {
node := value.(*NodeRpc)
nodeAddr := key.(string)
// MonitorShutdown 监控退出信号
func (mm *ETMaster) MonitorShutdown() {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
if node.client == nil {
client, err := rpc.Dial("tcp", nodeAddr)
if err != nil {
log.Printf("链接node失败-> node[%v]", nodeAddr)
return true
}
sig := <-sigChan
log.Printf("************ 接收到信号: %s,正在关闭服务器...", sig)
node.client = client
the.nodeMap.Store(nodeAddr, node)
}
mm.closeDataHandlers(&mm.rawDataHandlers, "DLP_DATA_RAW")
mm.closeDataHandlers(&mm.aggDataHandlers, "DLP_DATA_AGG")
return true
})
mm.errMessagesKafkaProducer.Close()
mm.grpcServer.GracefulStop()
log.Println("************ 服务器已成功关闭")
}
func (the *EtMaster) addOrUpdate(key string, newNode *NodeRpc) {
if val, ok := the.nodeMap.Load(key); ok {
hisNode := val.(*NodeRpc)
hisNode.client = newNode.client
the.nodeMap.Store(key, hisNode)
} else {
the.nodeMap.Store(key, newNode)
}
}
func (the *EtMaster) nodeMapCount() int {
count := 0
the.nodeMap.Range(func(key, value interface{}) bool {
count++
return true
})
return count
}
func (the *EtMaster) clientIsValid(address string) bool {
val, ok := the.nodeMap.Load(address)
if !ok {
return false
}
// closeDataHandlers 关闭数据处理器
func (mm *ETMaster) closeDataHandlers(handlers *sync.Map, dlpTopic string) {
handlers.Range(func(key, value any) bool {
handler := value.(data_source.IMessageHandler)
ch := handler.GetDataChannel()
close(ch)
if val.(*NodeRpc).client == nil {
return false
}
return true
}
// 获取最少things的节点
func (the *EtMaster) getNodeWithMinThings() *NodeRpc {
var minNode *NodeRpc
minThings := math.MaxInt64 // 初始化为最大值
the.nodeMap.Range(func(key, value interface{}) bool {
node := value.(*NodeRpc)
if len(node.args.ThingIds) < minThings {
minThings = len(node.args.ThingIds)
minNode = node
for data := range ch {
mm.errMessagesKafkaProducer.SendStringArrayMessage(dlpTopic, data.Id, data.Messages)
}
return true
})
return minNode
}
func (the *EtMaster) printNodes() {
count := 0
info := ""
the.nodeMap.Range(func(key, value interface{}) bool {
count++
node := value.(*NodeRpc)
info += fmt.Sprintf("%s,%s\n", node.args.ID, node.args.Addr)
return true
})
countInfo := fmt.Sprintf("共[%d]个节点:\n ", count)
log.Printf("%s %s\n", countInfo, info)
}
func (the *EtMaster) errorHandle(errCode int, address string, dataDesc string) {
val, ok := the.nodeMap.Load(address)
if !ok {
log.Printf("【tidyNodes】Error:不存在的node[%s]\n", address)
return
}
node := val.(*NodeRpc)
//发送 stop 信号
the.sleepCH <- true
log.Println("=============================================")
// 100 故障:程序内部错误
// 200 故障:网络通信问题
// 300 故障:处理超时
if errCode == 200 {
log.Printf("node[%v]连接已中断,休眠5秒后,将删除该节点。消息:%s", node.args.Addr, dataDesc)
time.Sleep(time.Second * 5)
the.nodeMap.Delete(address)
} else if errCode == 300 {
log.Printf("node[%s]处理超时,将休眠5秒后,将删除该节点。消息:%s", address, dataDesc)
time.Sleep(time.Second * 5)
the.nodeMap.Delete(address)
}
the.printNodes()
}
func contains(arr []string, target string) bool {
for _, value := range arr {
if value == target {
return true
}
}
return false
}
func timeCost(nodeId, deviceId string, start time.Time) {
tc := time.Since(start)
log.Printf("master调用node[%s],处理[%s]耗时%v", nodeId, deviceId, tc)
}
func boolToInt(b bool) int {
if b {
return 1
}
return 0
}

137
master/app/master_rpc_service.go

@ -0,0 +1,137 @@
package app
import (
"context"
"et_rpc"
"et_rpc/pb"
"fmt"
"log"
"master/node_manager"
)
// 实现 gRPC 服务接口
type MasterRPCService struct {
pb.UnimplementedMasterServiceServer
nodeManager *node_manager.NodeManager // 用于存储节点信息
}
func NewMasterRPCService(nodeManager *node_manager.NodeManager) *MasterRPCService {
return &MasterRPCService{
nodeManager: nodeManager,
}
}
// 实现 RegisterNode 方法
func (s *MasterRPCService) RegisterNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) {
// 创建响应对象
response := &pb.RpcResponse{
Status: pb.RpcResponse_SUCCESS,
ErrorMessage: "",
}
// 检查请求是否有效
if req == nil {
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "节点注册失败:req 为 nil")
}
// 添加节点
nodeArgs := &et_rpc.NodeArgs{
ID: req.Id,
Addr: req.Address,
Load: 0,
ResourceJson: "",
Weight: 0,
Status: 0,
ErrCode: 0,
ErrMessage: "",
}
if err := s.nodeManager.AddNode(nodeArgs); err != nil {
msg := fmt.Sprintf("[%s]节点注册失败:%s", nodeArgs.Addr, err.Error())
return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg)
}
response.Status = pb.RpcResponse_SUCCESS
response.ErrorMessage = fmt.Sprintf("[%s]节点注册成功!!", req.Address)
log.Printf(response.ErrorMessage)
return response, nil
}
// 实现 HeartbeatNode 方法
func (s *MasterRPCService) HeartbeatNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) {
// 创建响应对象
response := &pb.RpcResponse{
Status: pb.RpcResponse_SUCCESS,
ErrorMessage: "",
}
// 检查请求是否有效
if req == nil {
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil")
}
// 尝试更新节点状态
if !s.nodeManager.NodeExists(req.Address) {
msg := fmt.Sprintf("未注册的节点: %s", req.Address)
return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg)
}
log.Printf("收到 Node[%s] 心跳", req.Address)
response.Status = pb.RpcResponse_SUCCESS
return response, nil
}
// 实现 UnregisterNode 方法
func (s *MasterRPCService) UnregisterNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) {
// 创建响应对象
response := &pb.RpcResponse{
Status: pb.RpcResponse_SUCCESS,
ErrorMessage: "",
}
// 检查请求是否有效
if req == nil {
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil")
}
// 尝试更新节点状态
if !s.nodeManager.RemoveNode(req.Address) {
log.Printf("删除节点Node[%s],节点不存在", req.Address)
//return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg)
}
log.Printf("节点Node[%s]已删除", req.Address)
response.Status = pb.RpcResponse_SUCCESS
return response, nil
}
// 实现 CheckMasterStatus 方法
func (s *MasterRPCService) CheckMasterStatus(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) {
// 创建响应对象
response := &pb.RpcResponse{
Status: pb.RpcResponse_SUCCESS,
ErrorMessage: "",
}
// 检查请求是否有效
if req == nil {
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil")
}
// 记录主节点状态检查信息
log.Printf("主节点状态被节点检查: ID=%s", req.Address)
return response, nil
}
// mustEmbedUnimplementedMasterServiceServer 是一个占位符方法
func (s *MasterRPCService) mustEmbedUnimplementedMasterServiceServer() {}
// createErrorResponse 用于创建错误响应
func (s *MasterRPCService) createErrorResponse(status pb.RpcResponse_Status, message string) (*pb.RpcResponse, error) {
response := &pb.RpcResponse{
Status: status,
ErrorMessage: message,
}
log.Printf(message) // 记录错误信息
return response, fmt.Errorf(message)
}

36
master/data_source/data_agg_handler.go

@ -0,0 +1,36 @@
package data_source
import (
"log"
"time"
)
type AggDataHandler struct {
key string
topic string
partitionID int
dataChannel chan *RPCPayload // 用于发送打包后的数据
}
func NewAggDataHandler(key, topic string, partitionID int) *AggDataHandler {
handler := &AggDataHandler{
key: key,
topic: topic,
partitionID: partitionID,
dataChannel: make(chan *RPCPayload, 10),
}
return handler
}
func (h *AggDataHandler) HandleMessage(structId string, values []string) bool {
h.dataChannel <- &RPCPayload{Id: structId, Messages: values}
log.Printf("****** AggDataHandler.HandleMessage() ,h.dataChannel【%p】通道数据量:%d/%d", h.dataChannel, len(h.dataChannel), cap(h.dataChannel))
time.Sleep(50 * time.Millisecond)
return true
}
// GetDataChannel 返回 dataChannel
func (h *AggDataHandler) GetDataChannel() chan *RPCPayload {
return h.dataChannel
}

4
dataSource/kafka/aggData_test.go → master/data_source/data_agg_handler_test.go

@ -1,4 +1,4 @@
package kafka
package data_source
import (
"encoding/json"
@ -13,7 +13,7 @@ func TestAggDataHandler_HandleMessage(t *testing.T) {
aggDataMsg := `
{"date":"2024-09-19T09:39:59.999+0800","sensorId":106,"structId":1,"factorId":11,"aggTypeId":2006,"aggMethodId":3004,"agg":{"strain":-19.399999618530273},"changed":{"strain":-3}}
`
h.HandleMessage(aggDataMsg)
h.HandleMessage("1", []string{aggDataMsg})
}
func TestFormatTime(t *testing.T) {

49
master/data_source/data_raw_handler.go

@ -0,0 +1,49 @@
package data_source
import (
"log"
"time"
)
// IMessageHandler 是 kafka 消息处理者接口
type IMessageHandler interface {
HandleMessage(key string, values []string) bool
GetDataChannel() chan *RPCPayload
}
type RPCPayload struct {
Id string
Messages []string
}
type RawDataHandler struct {
key string
topic string
partitionID int
dataChannel chan *RPCPayload
}
// 创建一个新的 RawDataHandler 实例
func NewRawDataHandler(key, topic string, partitionID int) *RawDataHandler {
handler := &RawDataHandler{
key: key,
topic: topic,
partitionID: partitionID,
dataChannel: make(chan *RPCPayload, 10),
}
return handler
}
// 在 kafka_dataSource.go 的 Producer() 中被使用
func (h *RawDataHandler) HandleMessage(thingId string, values []string) bool {
h.dataChannel <- &RPCPayload{Id: thingId, Messages: values}
log.Printf("--> RawDataHandler%d ,h.dataChannel【%p】通道数据量: %d/%d", h.partitionID, h.dataChannel, len(h.dataChannel), cap(h.dataChannel))
time.Sleep(50 * time.Millisecond)
return true
}
// GetDataChannel 返回 dataChannel
func (h *RawDataHandler) GetDataChannel() chan *RPCPayload {
return h.dataChannel
}

317
master/data_source/kafka_consumerGroup_aggHandler.go

@ -0,0 +1,317 @@
package data_source
import (
"context"
"encoding/json"
"fmt"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"github.com/IBM/sarama"
"github.com/panjf2000/ants/v2"
"golang.org/x/time/rate"
"log"
"sync"
"time"
)
type AggConsumerGroupHandler struct {
kafkaConfig KafkaConfig
topicHandlers sync.Map // 主题处理方法
kafkaPaused bool //是否处于暂停数据接收状态
ControlChan chan string // 控制信号通道
mu sync.RWMutex
}
func NewAggConsumerGroupHandler(kafkaConfig KafkaConfig) *AggConsumerGroupHandler {
return &AggConsumerGroupHandler{
kafkaConfig: kafkaConfig,
ControlChan: make(chan string),
}
}
func (h *AggConsumerGroupHandler) ConnectConsumerGroup() {
log.Println("AggData kafka init...")
vp := configLoad.LoadConfig()
minFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.minFetch")
maxFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxFetch")
maxWaitTime := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxWaitTime")
// 消费者配置信息
config := sarama.NewConfig()
config.Consumer.Return.Errors = false // 不返回消费过程中的错误
config.Version = sarama.V2_0_0_0
config.Consumer.Offsets.Initial = sarama.OffsetOldest // 从最旧的消息开始消费
config.Consumer.Offsets.AutoCommit.Enable = true // 启动自动提交偏移量
config.Consumer.Offsets.AutoCommit.Interval = 1000 * time.Millisecond
config.Consumer.Fetch.Min = minFetch // 最小拉取 10 KB
config.Consumer.Fetch.Max = maxFetch // 最大拉取 5 MB
config.Consumer.MaxWaitTime = time.Duration(maxWaitTime) * time.Millisecond // 最大等待时间 ms
config.Consumer.Retry.Backoff = 10000 * time.Millisecond // 消费失败后重试的延迟时间
//config.Consumer.Retry.BackoffFunc = func(retries int) time.Duration {}
config.Consumer.Group.Session.Timeout = 60000 * time.Millisecond // 消费者的心跳 默认10s -> 30s
config.Consumer.Group.Heartbeat.Interval = 6000 * time.Millisecond // Heartbeat 这个值必须小于 session.timeout.ms ,一般小于 session.timeout.ms/3,默认是3s
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()} // 设置消费者组的负载均衡策略为轮询策略
// 创建消费者组
client, err := sarama.NewConsumerGroup(h.kafkaConfig.Brokers, h.kafkaConfig.GroupID, config)
if err != nil {
panic(err)
}
defer func() {
_ = client.Close()
}()
// 启动错误处理协程
go func() {
for err := range client.Errors() {
log.Println("消费错误:", err)
}
}()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// 接收控制信号
go func() {
for {
select {
case signal := <-h.ControlChan:
switch signal {
case "stop":
log.Printf("[Agg-ConsumerGroup-%d] 收到停止信号,将停止消费.", h.kafkaConfig.ClientID)
h.kafkaPaused = true
case "resume":
log.Printf("[Agg-ConsumerGroup-%d] 收到恢复信号,将恢复消费.", h.kafkaConfig.ClientID)
h.kafkaPaused = false
}
}
}
}()
log.Printf("[Agg-ConsumerGroup-%d] 准备启动 Kafka 消费者协程。订阅的主题: %v", h.kafkaConfig.ClientID, h.kafkaConfig.Topic)
// 创建消费者实例
consumerInstance := h
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
topics := []string{h.kafkaConfig.Topic}
err1 := client.Consume(ctx, topics, consumerInstance)
if err1 != nil {
log.Printf("[Agg-ConsumerGroup-%d] 订阅主题[%v]异常。%s", h.kafkaConfig.ClientID, h.kafkaConfig.Topic, err1.Error())
return
}
if ctx.Err() != nil {
log.Println(ctx.Err())
return
}
}
}()
log.Println("AggData Sarama consumer up and running ...")
wg.Wait()
}
func (h *AggConsumerGroupHandler) Setup(session sarama.ConsumerGroupSession) error {
// 在此执行任何必要的设置任务。
log.Printf("data_agg消费者组会话开始,%+v", session.Claims())
return nil
}
func (h *AggConsumerGroupHandler) Cleanup(session sarama.ConsumerGroupSession) error {
// 在此执行任何必要的清理任务。
log.Println("data_agg消费者组会话结束,", session.Claims())
return nil
}
func (h *AggConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
log.Printf("data_agg 处理消费者组会话,%+v. MemberID: %v, Topic: %v, Partition: %v \n", session.Claims(), session.MemberID(), claim.Topic(), claim.Partition())
topic := claim.Topic()
isDeadLetterQueue := false // 是否为死信队列消息
if len(topic) > 4 && topic[:4] == "DLP_" {
isDeadLetterQueue = true
}
if isDeadLetterQueue {
return h.DLPConsumeClaim(session, claim)
} else {
return h.BatchConsumeClaim(session, claim)
}
}
func (h *AggConsumerGroupHandler) BatchConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
maxBatchSize := configLoad.LoadConfig().GetInt("performance.master.kafkaConsumer.data_agg.maxBatchSize")
messageChannel := make(chan map[string][]*sarama.ConsumerMessage, 50)
topicHandlerKey := fmt.Sprintf("%s_%d", claim.Topic(), claim.Partition())
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey)
if !isValid {
log.Printf("[Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
return fmt.Errorf("[Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
}
// 使用 sync.Pool 复用 map 对象
messageMapPool := sync.Pool{
New: func() interface{} {
return make(map[string][]*sarama.ConsumerMessage)
},
}
// 启动一个 goroutine 来处理消息
var wg sync.WaitGroup
pool, _ := ants.NewPool(100)
defer pool.Release()
//创建一个速率限制器,它允许每 10 毫秒执行一次操作,且在这个时间窗口内最多只能执行一次。
var aggRateLimiter = rate.NewLimiter(rate.Every(10*time.Millisecond), 1)
go func() {
for structMessages := range messageChannel {
_ = aggRateLimiter.Wait(context.Background()) // 控制消费速率
wg.Add(len(structMessages))
for k, v := range structMessages {
key := k
msgs := v
_ = pool.Submit(func() {
defer wg.Done()
defer messageMapPool.Put(structMessages)
if len(msgs) == 0 {
return
}
values := make([]string, len(msgs))
for i, msg := range msgs {
values[i] = string(msg.Value)
}
// 处理消息并检查是否成功
isProcessed := msgHandler.(func(structId string, values []string) bool)(key, values) //msgHandler(key, values)
if !isProcessed {
log.Printf("[Agg-ConsumerGroup] 消息处理失败,键: %s,消息: %v", key, msgs)
} else {
// 处理成功后,消息标记为已处理
for _, msg := range msgs {
session.MarkMessage(msg, "is handled")
}
}
})
}
}
}()
batchBuffer := make(map[string][]*sarama.ConsumerMessage) // 按键分类的批次缓冲
currentBatchSize := make(map[string]int) // 记录每个 key 的当前批次大小
// 定义一个定时器,用于处理残余消息
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ticker := time.NewTicker(20 * time.Second)
defer ticker.Stop()
go func() {
for {
select {
case <-ticker.C:
// 处理剩余的消息
for structId, msgs := range batchBuffer {
if len(msgs) > 0 {
// 从池中获取 map 对象
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage)
msgMap[structId] = msgs
messageChannel <- msgMap
delete(batchBuffer, structId) // 清除已处理的键
delete(currentBatchSize, structId)
}
}
case <-ctx.Done():
return // 退出 Goroutine
}
}
}()
// 读消息
for msg := range claim.Messages() {
structId := string(msg.Key)
if structId == "" {
structId = "structId-null"
}
// 将消息添加到批次缓冲
batchBuffer[structId] = append(batchBuffer[structId], msg)
// 计算当前 key 的消息大小
currentBatchSize[structId] += len(msg.Value)
// 如果当前批次达到 maxBatchSize,发送到通道并重置
if currentBatchSize[structId] >= maxBatchSize {
// 从池中获取 map 对象
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage)
msgMap[structId] = batchBuffer[structId]
messageChannel <- msgMap
delete(batchBuffer, structId) // 清除已处理的键
delete(currentBatchSize, structId) // 清除已处理的大小
}
}
close(messageChannel)
wg.Wait()
return nil
}
func (h *AggConsumerGroupHandler) DLPConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
topicHandlerKey := "DLP_DATA_AGG"
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey)
if !isValid {
log.Printf("[DLP-Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
return fmt.Errorf("[DLP-Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
}
for msg := range claim.Messages() {
structId := string(msg.Key)
if structId == "" {
structId = "structId-null"
}
// 解析 value
var value []string
err := json.Unmarshal(msg.Value, &value)
if err != nil {
log.Printf("[DLP_Agg-ConsumerGroup]Failed to unmarshal value: %v", err)
continue
}
isProcessed := msgHandler.(func(structId string, values []string) bool)(structId, value)
if !isProcessed {
log.Printf("[DLP_Agg-ConsumerGroup]消息处理失败,structId: %s,消息: %v", structId, value)
} else {
// 处理成功后,消息标记为已处理
session.MarkMessage(msg, "is handled")
}
}
return nil
}
func (h *AggConsumerGroupHandler) SetTopicHandler(topicHandlerKey string, fun func(structId string, values []string) bool) {
h.mu.Lock()
defer h.mu.Unlock()
h.topicHandlers.Store(topicHandlerKey, fun)
}
// 消息消费启动控制
func (h *AggConsumerGroupHandler) SetKafkaPaused(paused bool) {
h.kafkaPaused = paused
if paused {
log.Println("Kafka消息消费 已暂停.")
} else {
log.Println("Kafka消息消费 已恢复.")
}
}

329
master/data_source/kafka_consumerGroup_iotaHandler.go

@ -0,0 +1,329 @@
package data_source
import (
"context"
"encoding/json"
"fmt"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"github.com/IBM/sarama"
"github.com/panjf2000/ants/v2"
"golang.org/x/time/rate"
"log"
"sync"
"time"
)
type RawConsumerGroupHandler struct {
kafkaConfig KafkaConfig
topicHandlers sync.Map // 分区主题处理方法
kafkaPaused bool //是否处于暂停数据接收状态
ControlChan chan string // 控制信号通道
mu sync.RWMutex
}
func NewRawConsumerGroupHandler(kafkaConfig KafkaConfig) *RawConsumerGroupHandler {
return &RawConsumerGroupHandler{
kafkaConfig: kafkaConfig,
ControlChan: make(chan string),
}
}
func (h *RawConsumerGroupHandler) ConnectConsumerGroup() {
log.Println("RawData kafka init...")
vp := configLoad.LoadConfig()
minFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.minFetch")
maxFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxFetch")
maxWaitTime := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxWaitTime")
// 消费者配置信息
config := sarama.NewConfig()
config.Consumer.Return.Errors = false // 不返回消费过程中的错误
config.Version = sarama.V2_0_0_0
config.Consumer.Offsets.Initial = sarama.OffsetOldest // 从最旧的消息开始消费
config.Consumer.Offsets.AutoCommit.Enable = true // 启动自动提交偏移量
config.Consumer.Offsets.AutoCommit.Interval = 1000 * time.Millisecond
config.Consumer.Fetch.Min = minFetch // 最小拉取 10 KB
config.Consumer.Fetch.Max = maxFetch // 最大拉取 5 MB
config.Consumer.MaxWaitTime = time.Duration(maxWaitTime) * time.Millisecond // 最大等待时间 ms
config.Consumer.Retry.Backoff = 10000 * time.Millisecond // 消费失败后重试的延迟时间
//config.Consumer.Retry.BackoffFunc = func(retries int) time.Duration {}
config.Consumer.Group.Session.Timeout = 60000 * time.Millisecond // 消费者的心跳 默认10s -> 30s
config.Consumer.Group.Heartbeat.Interval = 6000 * time.Millisecond // Heartbeat 这个值必须小于 session.timeout.ms ,一般小于 session.timeout.ms/3,默认是3s
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()} // 设置消费者组的负载均衡策略为轮询策略
// 创建消费者组
client, err := sarama.NewConsumerGroup(h.kafkaConfig.Brokers, h.kafkaConfig.GroupID, config)
if err != nil {
panic(err)
}
defer func() {
_ = client.Close()
}()
// 启动错误处理协程
go func() {
for err := range client.Errors() {
log.Println("消费错误:", err)
}
}()
// 接收控制信号
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
for {
select {
case signal := <-h.ControlChan:
switch signal {
case "stop":
log.Printf("[Raw-ConsumerGroup-%d] 收到停止信号,将停止消费.", h.kafkaConfig.ClientID)
h.kafkaPaused = true
case "resume":
log.Printf("[Raw-ConsumerGroup-%d] 收到恢复信号,将恢复消费.", h.kafkaConfig.ClientID)
h.kafkaPaused = false
}
case <-ctx.Done():
return
}
}
}()
log.Printf("[Raw-ConsumerGroup-%d] 准备启动 Kafka 消费者协程。订阅的主题: %v", h.kafkaConfig.ClientID, h.kafkaConfig.Topic)
// 创建消费者实例
consumerInstance := h
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
// 消费 Kafka 消息
topics := []string{h.kafkaConfig.Topic}
err1 := client.Consume(ctx, topics, consumerInstance) // 加入消费者组,并订阅指定主题。Kafka会为每个消费者分配相应的分区。
if err1 != nil {
log.Printf("[Raw-ConsumerGroup-%d] 订阅主题[%v]异常。%s", h.kafkaConfig.ClientID, h.kafkaConfig.Topic, err1.Error())
return
}
if ctx.Err() != nil {
log.Println(ctx.Err())
return
}
}
}()
log.Println("RawData Sarama consumer up and running ...")
wg.Wait()
}
// Setup 在新会话开始时运行
func (h *RawConsumerGroupHandler) Setup(session sarama.ConsumerGroupSession) error {
// 在此执行任何必要的设置任务。
log.Printf("data_raw消费者组会话开始,%+v \n", session.Claims())
return nil
}
// Cleanup 在会话结束时运行
func (h *RawConsumerGroupHandler) Cleanup(session sarama.ConsumerGroupSession) error {
// 在此执行任何必要的清理任务。
log.Println("data_raw消费者组会话结束,", session.Claims())
return nil
}
// ConsumeClaim 启动消费者循环
func (h *RawConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
log.Printf("data_raw 处理消费者组会话,%+v, MemberID: %v, Topic: %v, Partition: %v \n", session.Claims(), session.MemberID(), claim.Topic(), claim.Partition())
topic := claim.Topic()
isDeadLetterQueue := false // 是否为死信队列消息
if len(topic) > 4 && topic[:4] == "DLP_" {
isDeadLetterQueue = true
}
if isDeadLetterQueue {
return h.DLPConsumeClaim(session, claim)
} else {
return h.BatchConsumeClaim(session, claim)
}
}
func (h *RawConsumerGroupHandler) BatchConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
//const maxBatchSize = 50 * 1024 // TODO 设置每个批次的最大字节数,例如 50kB
maxBatchSize := configLoad.LoadConfig().GetInt("performance.master.kafkaConsumer.data_raw.maxBatchSize")
messageChannel := make(chan map[string][]*sarama.ConsumerMessage, 100)
topicHandlerKey := fmt.Sprintf("%s_%d", claim.Topic(), claim.Partition())
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey)
if !isValid {
log.Printf("[Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
return fmt.Errorf("[Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
}
messageMapPool := sync.Pool{
New: func() interface{} {
return make(map[string][]*sarama.ConsumerMessage)
},
}
// 启动一个 goroutine 来处理消息
var wg sync.WaitGroup
pool, _ := ants.NewPool(100)
defer pool.Release()
var rawRateLimiter = rate.NewLimiter(rate.Every(10*time.Millisecond), 1)
go func() {
for thingMessages := range messageChannel {
_ = rawRateLimiter.Wait(context.Background()) // 控制消费速率
wg.Add(len(thingMessages))
for k, v := range thingMessages {
key := k
msgs := v
_ = pool.Submit(func() {
defer wg.Done()
defer messageMapPool.Put(thingMessages) // 归还到池中
if len(msgs) == 0 {
return
}
values := make([]string, len(msgs))
for i, msg := range msgs {
values[i] = string(msg.Value)
}
// 处理消息并检查是否成功
isProcessed := msgHandler.(func(thingId string, values []string) bool)(key, values) //msgHandler(key, values)
if !isProcessed {
log.Printf("[Raw-ConsumerGroup] 消息处理失败,键: %s,消息: %v", key, msgs)
} else {
// 处理成功后,消息标记为已处理
for _, msg := range msgs {
session.MarkMessage(msg, "is handled")
}
}
})
}
}
}()
batchBuffer := make(map[string][]*sarama.ConsumerMessage) // 按 thingId 分类的批次缓冲
currentBatchSize := make(map[string]int) // 记录每个 thingId 的当前批次大小
// 定义一个定时器,用于处理剩余消息
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ticker := time.NewTicker(20 * time.Second)
defer ticker.Stop()
go func() {
for {
select {
case <-ticker.C:
// 处理剩余的消息
for thingId, msgs := range batchBuffer {
if len(msgs) > 0 {
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage)
msgMap[thingId] = msgs
messageChannel <- msgMap
delete(batchBuffer, thingId)
delete(currentBatchSize, thingId) // 统一清理
}
}
case <-ctx.Done():
return // 退出 Goroutine
}
}
}()
// 读消息
for msg := range claim.Messages() {
thingId := string(msg.Key)
if thingId == "" {
thingId = "thingId-null"
}
// 将消息添加到批次缓冲
batchBuffer[thingId] = append(batchBuffer[thingId], msg)
// 计算当前 key 的消息大小
currentBatchSize[thingId] += len(msg.Value)
// 如果当前批次达到 maxBatchSize,发送到通道并重置
if currentBatchSize[thingId] >= maxBatchSize {
// 从池中获取 map 对象
thingMessages := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage)
thingMessages[thingId] = batchBuffer[thingId]
messageChannel <- thingMessages
delete(batchBuffer, thingId) // 清除已处理的键
delete(currentBatchSize, thingId) // 清除已处理的大小
}
}
close(messageChannel)
wg.Wait()
return nil
}
func (h *RawConsumerGroupHandler) DLPConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
topicHandlerKey := "DLP_DATA_RAW"
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey)
if !isValid {
log.Printf("[DLP-Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
return fmt.Errorf("[DLP-Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition())
}
for msg := range claim.Messages() {
thingId := string(msg.Key)
if thingId == "" {
thingId = "thingId-null"
}
// 解析 value
var value []string
err := json.Unmarshal(msg.Value, &value)
if err != nil {
log.Printf("[DLP_Raw-ConsumerGroup]Failed to unmarshal value: %v", err)
continue
}
isProcessed := msgHandler.(func(thingId string, values []string) bool)(thingId, value)
if !isProcessed {
log.Printf("[DLP_Raw-ConsumerGroup]消息处理失败,thingId: %s,消息: %v", thingId, value)
} else {
// 处理成功后,消息标记为已处理
session.MarkMessage(msg, "is handled")
}
}
return nil
}
func (h *RawConsumerGroupHandler) SetTopicHandler(topicHandlerKey string, fun func(thingId string, values []string) bool) {
h.mu.Lock()
defer h.mu.Unlock()
h.topicHandlers.Store(topicHandlerKey, fun)
}
// 消息消费启动控制
func (h *RawConsumerGroupHandler) SetKafkaPaused(paused bool) {
h.kafkaPaused = paused
if paused {
log.Println("Kafka消息消费 已暂停.")
} else {
log.Println("Kafka消息消费 已恢复.")
}
}
//// TODO 动态调整消费速率
//func (h *RawConsumerGroupHandler) SetConsumeRate(interval time.Duration) {
// h.mu.Lock()
// defer h.mu.Unlock()
// rateLimiter.SetLimit(rate.Every(interval))
//}

278
master/data_source/kafka_dataSource.go

@ -0,0 +1,278 @@
package data_source
import (
"fmt"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"github.com/spf13/viper"
"log"
"sync"
)
type KafkaConfig struct {
ClientID int // 暂时无用
Brokers []string
GroupID string
Topic string
Partitions int
}
type TopicConfig struct {
Topic string
Partitions int
}
type KafkaConsumerConfig struct {
RawData *RawDataConfig //`yaml:"data_raw"`
AggData *AggDataConfig // `yaml:"data_agg"`
}
type RawDataConfig struct {
MaxBatchSize int `yaml:"maxBatchSize"`
IotaBufSize int `yaml:"iotaBufSize"`
ProcessBufSize int `yaml:"processBufSize"`
}
type AggDataConfig struct {
MaxBatchSize int `yaml:"maxBatchSize"`
AggBufSize int `yaml:"aggBufSize"`
}
type KafkaDataSource struct {
groupId string
Brokers []string
Topics map[string]TopicConfig
Master_kafkaConsumer_config *KafkaConsumerConfig // 性能配置
RawDataHandlers *sync.Map // 原始数据处理器
AggDataHandlers *sync.Map // 聚集数据处理器
kafkaPaused bool
controlChan chan string // 控制信号通道
}
func NewKafkaDataSource() *KafkaDataSource {
k := &KafkaDataSource{
controlChan: make(chan string),
}
k.loadKafkaConfig()
return k
}
func (s *KafkaDataSource) loadKafkaConfig() {
vp := configLoad.LoadConfig()
groupId := vp.GetString("kafka.groupId")
brokers := vp.GetStringSlice("kafka.Brokers")
log.Println("消费者组 kafka.groupId:", groupId)
s.groupId = groupId
s.Brokers = brokers
s.loadTopics(vp)
s.loadKafkaConsumerConfig(vp)
}
func (s *KafkaDataSource) loadTopics(vp *viper.Viper) {
topics := make(map[string]TopicConfig)
// 定义要加载的主题列表
topicNames := []string{"data_raw", "data_agg"}
for _, topicName := range topicNames {
topic := vp.GetString(fmt.Sprintf("kafka.Topics.%s.topic", topicName))
if topic == "" {
log.Printf("主题 kafka.Topics.%s.topic 配置为空", topicName)
continue
}
partitions := vp.GetInt(fmt.Sprintf("kafka.Topics.%s.partitions", topicName))
if partitions <= 0 {
partitions = 1
}
topics[topicName] = TopicConfig{
Topic: topic,
Partitions: partitions,
}
}
s.Topics = topics
}
func (s *KafkaDataSource) loadKafkaConsumerConfig(vp *viper.Viper) {
// 获取 kafkaConsumer 部分的配置
kafkaConsumerKey := "performance.master.kafkaConsumer"
if !vp.IsSet(kafkaConsumerKey) {
log.Panicf("配置 %s 必须存在", kafkaConsumerKey)
}
// 创建 KafkaConsumerConfig 实例
config := &KafkaConsumerConfig{}
// 解析 data_raw 配置
if vp.IsSet(kafkaConsumerKey + ".data_raw") {
dataRaw := &RawDataConfig{}
if err := vp.UnmarshalKey(kafkaConsumerKey+".data_raw", dataRaw); err != nil {
log.Panicf("解析 data_raw 配置失败: %v\n", err)
} else {
config.RawData = dataRaw
}
}
// 解析 data_agg 配置
if vp.IsSet(kafkaConsumerKey + ".data_agg") {
dataAgg := &AggDataConfig{}
if err := vp.UnmarshalKey(kafkaConsumerKey+".data_agg", dataAgg); err != nil {
log.Panicf("解析 data_agg 配置失败: %v\n", err)
} else {
config.AggData = dataAgg
}
}
s.Master_kafkaConsumer_config = config
}
func (s *KafkaDataSource) AggDataProducer() {
var wg sync.WaitGroup
const topicCfgKey = "data_agg"
topicCfg := s.Topics[topicCfgKey]
if topicCfg.Topic == "" {
log.Printf("Error: 启动 AggData Producer 失败,无 kafka.topics.data_agg 配置。")
return
}
if s.Master_kafkaConsumer_config.AggData == nil {
log.Printf("Error: 启动 AggData Producer 失败,无 performance.master.kafkaConsumer.data_agg 配置。")
return
}
// 启动工作协程
wg.Add(1)
go func(clientID int) {
defer wg.Done()
kafkaHandler := NewAggConsumerGroupHandler(KafkaConfig{
Brokers: s.Brokers,
GroupID: s.groupId,
Topic: topicCfg.Topic,
Partitions: topicCfg.Partitions,
ClientID: clientID,
})
kafkaHandler.ControlChan = s.controlChan
// 装载配置
for partId := 0; partId < topicCfg.Partitions; partId++ {
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId)
msgHandler, ok := s.getDataHandler(topicCfgKey, key)
if !ok || msgHandler == nil {
log.Panicf("Kafka topic[%s] 未定义data_agg 消息处理者,跳过。\n", key)
continue
}
// 把消息传递给 dataSource/kafka/AggDataHandler.HandleMessage([]string)
kafkaHandler.SetTopicHandler(key, msgHandler.HandleMessage)
}
// 失败消息处理者
dlpKey := "DLP_DATA_RAW"
dataHandler, ok := s.RawDataHandlers.Load(dlpKey)
if !ok {
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", dlpKey)
}
msgHandler, _ := dataHandler.(IMessageHandler)
kafkaHandler.SetTopicHandler(dlpKey, msgHandler.HandleMessage)
// 启动消费组
kafkaHandler.ConnectConsumerGroup()
}(1)
wg.Wait()
}
// Producer 将 kafka message -> 各数据模型 -> 各数据通道
func (s *KafkaDataSource) RawDataProducer() {
var wg sync.WaitGroup
const topicCfgKey = "data_raw"
topicCfg := s.Topics[topicCfgKey]
if topicCfg.Topic == "" {
log.Printf("Error: 启动 RawData Producer 失败,无 kafka.topics.data_raw 配置。")
return
}
if s.Master_kafkaConsumer_config.RawData == nil {
log.Printf("Error: 启动 RawData Producer 失败,无 performance.master.kafkaConsumer.data_raw 配置。")
return
}
// 启动工作协程
wg.Add(1)
go func(clientID int) {
defer wg.Done()
kafkaHandler := NewRawConsumerGroupHandler(KafkaConfig{
Brokers: s.Brokers,
GroupID: s.groupId,
Topic: topicCfg.Topic,
Partitions: topicCfg.Partitions,
ClientID: clientID,
})
kafkaHandler.ControlChan = s.controlChan
for partId := 0; partId < topicCfg.Partitions; partId++ {
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId)
msgHandler, ok := s.getDataHandler(topicCfgKey, key)
if !ok || msgHandler == nil {
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", key)
continue
}
// 把消息传递给 dataSource/kafka/RawDataHandler.HandleMessage([]string)
kafkaHandler.SetTopicHandler(key, msgHandler.HandleMessage)
}
// 失败消息处理者
dlpKey := "DLP_DATA_RAW"
dataHandler, ok := s.RawDataHandlers.Load(dlpKey)
if !ok {
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", dlpKey)
}
msgHandler, _ := dataHandler.(IMessageHandler)
kafkaHandler.SetTopicHandler(dlpKey, msgHandler.HandleMessage)
//启动消费
kafkaHandler.ConnectConsumerGroup()
}(1)
wg.Wait()
}
// 根据 key 获取 dataHandler
func (s *KafkaDataSource) getDataHandler(topicCfg, key string) (IMessageHandler, bool) {
var dataHandler any
var exists bool
if topicCfg == "data_agg" {
dataHandler, exists = s.AggDataHandlers.Load(key)
} else if topicCfg == "data_raw" {
dataHandler, exists = s.RawDataHandlers.Load(key)
}
if !exists {
return nil, false
}
handler, ok := dataHandler.(IMessageHandler)
if !ok {
return nil, false
}
return handler, true
}
// 发送停止信号
func (s *KafkaDataSource) StopConsumers() {
s.controlChan <- "stop"
}
// 发送恢复信号
func (s *KafkaDataSource) ResumeConsumers() {
s.controlChan <- "resume"
}

67
master/data_source/kafka_producer.go

@ -0,0 +1,67 @@
package data_source
import (
"encoding/json"
"fmt"
"github.com/IBM/sarama"
"log"
"time"
)
type KafkaProducer struct {
producer sarama.SyncProducer
brokers []string
}
func NewKafkaProducer(brokers []string) (*KafkaProducer, error) {
// 配置 Kafka 生产者
producerConfig := sarama.NewConfig()
producerConfig.Producer.Return.Successes = true // 返回成功发送的消息
producerConfig.Producer.Return.Errors = true // 返回发送失败的消息
producerConfig.Producer.RequiredAcks = sarama.WaitForAll // 等待所有副本确认
producerConfig.Producer.Timeout = 10 * time.Second // 生产者超时时间
producerConfig.Producer.Retry.Max = 3 // 最大重试次数
producerConfig.Producer.Retry.Backoff = 100 * time.Millisecond // 重试间隔时间
producerConfig.Producer.Compression = sarama.CompressionSnappy // 使用 Snappy 压缩
producerConfig.Producer.MaxMessageBytes = 1024 * 1024 * 4 // 单条消息最大 4MB
producer, err := sarama.NewSyncProducer(brokers, producerConfig)
if err != nil {
return nil, fmt.Errorf("failed to create Kafka producer:%v", err)
}
return &KafkaProducer{
producer: producer,
brokers: brokers,
}, nil
}
// 实现将 messages 发送到 Kafka 的指定 topic
func (kp *KafkaProducer) SendStringArrayMessage(topic, msgKey string, values []string) error {
// 将 value 序列化为 JSON
valueBytes, err := json.Marshal(values)
if err != nil {
return fmt.Errorf("failed to marshal value: %v", err)
}
// 构造 Kafka 消息
msg := &sarama.ProducerMessage{
Topic: topic,
Key: sarama.StringEncoder(msgKey),
Value: sarama.ByteEncoder(valueBytes),
}
// 发送消息
_, _, err = kp.producer.SendMessage(msg)
if err != nil {
return fmt.Errorf("failed to send message to Kafka: %v", err)
}
log.Printf("Message sent successfully: key=%s, len(value)=%v", msgKey, len(values))
return nil
}
// Close 关闭 Kafka 生产者
func (kp *KafkaProducer) Close() error {
return kp.producer.Close()
}

112
master/node_manager/connection_pool_grpc.go

@ -0,0 +1,112 @@
package node_manager
import (
"context"
"et_rpc/pb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"time"
pool "github.com/jolestar/go-commons-pool"
)
type GRPCPoolObject struct {
Conn *grpc.ClientConn // 保存 gRPC 连接
Client pb.NodeServiceClient // gRPC 客户端
}
type GRPCClientFactory struct {
address string
}
// NewGRPCClientFactory 创建新的 gRPC 连接工厂
func NewGRPCClientFactory(address string) *GRPCClientFactory {
return &GRPCClientFactory{
address: address,
}
}
func (f *GRPCClientFactory) MakeObject(ctx context.Context) (*pool.PooledObject, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// 定义重试策略
serviceConfig := `{
"methodConfig": [{
"name": [{"service": "NodeService", "method": "*"}],
"retryPolicy": {
"maxAttempts": 2,
"initialBackoff": "1s",
"maxBackoff": "10s",
"backoffMultiplier": 2,
"retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"]
}
}]
}`
conn, err := grpc.NewClient(
f.address,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultServiceConfig(serviceConfig),
)
if err != nil {
return nil, err // 如果3次都失败,返回错误
}
client := pb.NewNodeServiceClient(conn)
return pool.NewPooledObject(
&GRPCPoolObject{
Conn: conn,
Client: client,
},
), nil
}
// 销毁 gRPC 连接
func (f *GRPCClientFactory) DestroyObject(ctx context.Context, object *pool.PooledObject) error {
grpcPoolObj := object.Object.(*GRPCPoolObject)
if grpcPoolObj.Client != nil {
// 关闭连接
grpcPoolObj.Conn.Close() // gRPC 客户端连接关闭
}
return nil
}
// 验证 gRPC 连接的有效性
func (f *GRPCClientFactory) ValidateObject(ctx context.Context, object *pool.PooledObject) bool {
grpcPoolObj := object.Object.(*GRPCPoolObject)
select {
case <-ctx.Done():
return false // 如果上下文已经取消,返回无效
default:
// 继续进行有效性检查
}
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn)
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{
Service: "NodeService",
})
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
log.Println("ValidateObject failed:", err)
return false
}
return true
}
// 激活 gRPC 连接
func (f *GRPCClientFactory) ActivateObject(ctx context.Context, object *pool.PooledObject) error {
// 可以在这里发送心跳请求以确保连接有效
return nil
}
// 非激活 gRPC 连接
func (f *GRPCClientFactory) PassivateObject(ctx context.Context, object *pool.PooledObject) error {
// 可以在这里进行连接的重置,例如清除状态或缓存
return nil
}

128
master/node_manager/load_balancer.go

@ -0,0 +1,128 @@
package node_manager
import (
"et_rpc"
"fmt"
"sync"
"time"
)
const (
Node_Load_Change_Threshold = 200
Node_Load_Threshold = 200 // 节点数据积压阈值
Node_Refresh_Interval = 120 * time.Second // 节点信息刷新间隔
)
type LoadBalancer struct {
nodes []*NodeConnection
nodeSelector INodeSelector // 节点选择器
mu sync.RWMutex
}
func NewLoadBalancer(selector INodeSelector) *LoadBalancer {
lb := &LoadBalancer{
nodes: make([]*NodeConnection, 0),
nodeSelector: selector,
mu: sync.RWMutex{},
}
// TODO 启动健康度检查
//go lb.HealthCheck()
return lb
}
func (b *LoadBalancer) AddNode(node *NodeConnection) {
b.mu.Lock()
defer b.mu.Unlock()
b.nodes = append(b.nodes, node)
}
func (b *LoadBalancer) RemoveNode(addr string) bool {
b.mu.Lock()
defer b.mu.Unlock()
for i, node := range b.nodes {
if node.Addr == addr {
b.nodes = append(b.nodes[:i], b.nodes[i+1:]...)
return true
}
}
return false
}
func (b *LoadBalancer) SetSelector(selector INodeSelector) {
b.mu.Lock()
defer b.mu.Unlock()
b.nodeSelector = selector
}
// SelectNode 通过节点选择器选择节点
func (b *LoadBalancer) SelectNode() (*NodeConnection, error) {
b.mu.RLock()
defer b.mu.RUnlock()
return b.nodeSelector.Select(b.nodes)
}
// UpdateNode 事件驱动更新节点,isError 立即更新
func (b *LoadBalancer) UpdateNode(nodeArgs *et_rpc.NodeArgs, isError bool) error {
b.mu.Lock()
defer b.mu.Unlock()
// 不健康:status = NodeState_Unhealthy 或者 Load 超阈值
for _, node := range b.nodes {
if node.Addr == nodeArgs.Addr {
isOverThreshold := abs(node.NArgs.Load-nodeArgs.Load) > Node_Load_Change_Threshold // 荷载变化超阈值
isTimeout := time.Since(node.lastUpdate) > Node_Refresh_Interval // 超刷新间隔
if isError || isOverThreshold || isTimeout {
node.NArgs.Load = nodeArgs.Load
node.NArgs.Status = et_rpc.NodeState_Healthy //TODO node.GetHealthStatus(nodeArgs)
node.lastUpdate = time.Now()
}
return nil
}
}
return fmt.Errorf("未注册的节点: %s", nodeArgs.Addr)
}
func (b *LoadBalancer) NodeExists(nodeAddr string) bool {
b.mu.RLock()
defer b.mu.RUnlock()
for _, node := range b.nodes {
if node.Addr == nodeAddr {
return true
}
}
return false
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
// 定时健康检查、更新节点状态
//func (b *LoadBalancer) HealthCheck() {
// for {
// b.mu.Lock()
// reply := new(et_rpc.NodeArgs)
// for _, node := range b.nodes {
// result := b.checkNodeHealth(node, reply)
// b.UpdateNode(reply, result)
// }
// b.mu.Unlock()
//
// time.Sleep(5 * time.Minute)
// }
//}
//
//// 健康检查的具体实现
//func (b *LoadBalancer) checkNodeHealth(conn *NodeConnection, reply *et_rpc.NodeArgs) bool {
// // 健康检查,例如发送心跳请求等
// err := conn.Call(context.Background(), et_rpc.RPCService_Node_Ping, &et_rpc.NodeArgs{}, reply)
// return err == nil
//
// // TODO 根据返回信息:节点的CPU、内存、硬盘使用情况、数据积压情况来判断节点的健康情况
//}

186
master/node_manager/node_connection_grpc.go

@ -0,0 +1,186 @@
package node_manager
import (
"context"
"et_rpc"
"et_rpc/pb"
"fmt"
"gitea.anxinyun.cn/container/common_models"
pool "github.com/jolestar/go-commons-pool"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"sync"
"time"
)
type NodeConnection struct {
Addr string
NArgs *et_rpc.NodeArgs
rpcPool *pool.ObjectPool
lastUpdate time.Time // 节点信息更新时间
ctx context.Context
mu sync.Mutex
}
// NewNodeConnection 创建一个 NodeConnection
// TODO NewNodeConnection从配置文件中获取 pool 参数
func NewNodeConnection(args *et_rpc.NodeArgs) (*NodeConnection, error) {
ctx := context.Background()
factory := NewGRPCClientFactory(args.Addr)
p := pool.NewObjectPoolWithDefaultConfig(ctx, factory)
p.Config.MaxTotal = 400
p.Config.MinIdle = 200
p.Config.TestOnBorrow = true
p.Config.TestOnReturn = false
p.Config.TestWhileIdle = true // 是否在空闲时检查连接有效性
p.Config.MinEvictableIdleTime = 30 * time.Minute //空闲连接最小可驱逐时间
//p.Config.SoftMinEvictableIdleTime = 15 * time.Minute //空闲连接软最小可驱逐时间
nodeConn := &NodeConnection{
ctx: ctx,
Addr: args.Addr,
rpcPool: p,
NArgs: args,
}
// 获取连接进行简单的测试
obj, err := nodeConn.rpcPool.BorrowObject(ctx)
if err != nil {
return nil, fmt.Errorf("建立RPC连接失败:%w", err)
}
defer nodeConn.rpcPool.ReturnObject(ctx, obj)
grpcPoolObj, ok := obj.(*GRPCPoolObject)
if !ok {
log.Fatalf("类型断言失败,obj 不是 *GRPCPoolObject 类型")
}
// 健康检查
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn)
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{
Service: "NodeService",
})
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
return nil, fmt.Errorf("健康检查失败: %v, 状态: %v", err, resp.Status)
}
return nodeConn, nil
}
func (n *NodeConnection) GetHealthStatus(args *common_models.NodeArgs) et_rpc.NodeState {
// TODO CPU/Memory/Disk 使用是否超过阈值
//resourcesIsOver := true
return et_rpc.NodeState_Healthy
// 荷载是否超过阈值、访问RPC失败
//if args.Load > Node_Load_Threshold || args.Status == et_rpc.NodeState_Unhealthy {
// return et_rpc.NodeState_Unhealthy
//} else {
// return et_rpc.NodeState_Healthy
//}
}
// 更新节点信息
//func (n *NodeConnection) UpdateNodeArgs(args *et_rpc.NodeArgs, forceUpdate bool) {
// n.mu.Lock()
// defer n.mu.Unlock()
//
// // 检查是否需要更新节点信息
// isOverThreshold := abs(n.NArgs.Load-args.Load) > Node_Load_Change_Threshold // 荷载变化超阈值
// isTimeout := time.Since(n.lastUpdate) > Node_Refresh_Interval // 超刷新间隔
//
// if forceUpdate || isOverThreshold || isTimeout {
// // 更新节点信息
// n.NArgs.Load = args.Load
// n.NArgs.Status = n.GetHealthStatus(args)
// n.lastUpdate = time.Now()
// }
//}
func (n *NodeConnection) CallHandleIotaData(id string, messages []string) error {
// 创建新的上下文并设置超时
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
// 从连接池中借用一个连接
obj1, err := n.rpcPool.BorrowObject(ctx)
if err != nil {
return fmt.Errorf("gRPC[HandleIotaData] 借用对象错误: %w", err)
}
// 使用连接相关处理
rpcPoolObj, ok := obj1.(*GRPCPoolObject)
if !ok {
log.Fatalf("类型断言失败,obj1 不是 *GRPCPoolObject 类型")
}
defer func() {
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil {
log.Printf("gRPC[HandleIotaData] 归还对象到连接池失败: %v", err)
}
}()
// 进行 RPC 调用
request := &pb.HandleDataRequest{
Id: id,
Messages: messages,
}
startTime := time.Now()
_, err = rpcPoolObj.Client.HandleIotaData(ctx, request)
duration := time.Since(startTime)
if err != nil {
log.Printf("调用失败。gRPC[HandleIotaData] 错误: %v, 耗时: %v", err, duration)
return fmt.Errorf("调用失败。gRPC[HandleIotaData] 错误: %w", err)
}
//log.Printf("调用成功。gRPC[HandleIotaData] resp=%+v, 耗时: %v", resp, duration)
return nil
}
func (n *NodeConnection) CallHandleAggData(id string, messages []string) error {
// 创建新的上下文并设置超时
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
// 从连接池中借用一个连接
obj1, err := n.rpcPool.BorrowObject(ctx)
if err != nil {
return fmt.Errorf("gRPC[HandleAggData] 借用对象错误: %w", err)
}
// 使用连接相关处理
rpcPoolObj, ok := obj1.(*GRPCPoolObject)
if !ok {
log.Fatalf("类型断言失败,obj1 不是 *GRPCPoolObject 类型")
}
defer func() {
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil {
log.Printf("gRPC[HandleAggData] 归还对象到连接池失败: %v", err)
}
}()
// 进行 RPC 调用
request := &pb.HandleDataRequest{
Id: id,
Messages: messages,
}
startTime := time.Now()
_, err = rpcPoolObj.Client.HandleAggData(ctx, request)
duration := time.Since(startTime)
if err != nil {
log.Printf("调用失败。gRPC[HandleAggData] 错误: %v, 耗时: %v", err, duration)
return fmt.Errorf("调用失败。gRPC[HandleAggData] 错误: %w", err)
}
//log.Printf("调用成功。gRPC[HandleAggData] resp=%+v, 耗时: %v", resp, duration)
return nil
}

58
master/node_manager/node_manager.go

@ -0,0 +1,58 @@
package node_manager
import (
"et_rpc"
"log"
)
// NodeManager 和 rpcPool 提供了高效的节点管理和 RPC 连接管理功能,支持高并发和连接复用,提升系统性能和稳定性。
type NodeManager struct {
loadBalancer *LoadBalancer
}
func NewNodeManager(lb *LoadBalancer) *NodeManager {
return &NodeManager{
loadBalancer: lb,
}
}
func (m *NodeManager) AddNode(args *et_rpc.NodeArgs) error {
nodeConn, err := NewNodeConnection(args)
if err != nil {
log.Printf("添加Node节点失败:%s\n", err)
} else {
m.loadBalancer.AddNode(nodeConn)
log.Printf("添加Node节点: %s\n", args.Addr)
}
nodeConn.rpcPool.GetNumIdle()
log.Printf("master共有 %d 个节点", m.NodesCount())
for _, node := range m.loadBalancer.nodes {
log.Printf("master -> Node[%s] 的空闲连接有 %d 个。", node.Addr, node.rpcPool.GetNumIdle())
}
return err
}
func (m *NodeManager) RemoveNode(addr string) bool {
return m.loadBalancer.RemoveNode(addr)
//log.Printf("删除Node节点: %s\n", addr)
}
// UpdateNode 更新节点信息,isError 立即更新
func (m *NodeManager) UpdateNode(nodeArgs *et_rpc.NodeArgs, isError bool) error {
return m.loadBalancer.UpdateNode(nodeArgs, isError)
}
func (m *NodeManager) NodeExists(nodeAddr string) bool {
return m.loadBalancer.NodeExists(nodeAddr)
}
func (m *NodeManager) GetNodeConnection() (*NodeConnection, error) {
return m.loadBalancer.SelectNode()
}
func (m *NodeManager) NodesCount() int {
return len(m.loadBalancer.nodes)
}

56
master/node_manager/node_selector.go

@ -0,0 +1,56 @@
package node_manager
import (
"fmt"
"sync/atomic"
)
type INodeSelector interface {
Select(nodes []*NodeConnection) (*NodeConnection, error)
}
type RoundRobinSelector struct {
index int32
}
func (s *RoundRobinSelector) Select(nodes []*NodeConnection) (*NodeConnection, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("没有可用的节点")
}
// 原子读取当前索引
currentIndex := atomic.LoadInt32(&s.index)
selectedNode := nodes[currentIndex%int32(len(nodes))]
// TODO 检查节点状态, 暂时先不检查节点健康状态
s.UpdateIndex() // 如果节点健康,更新索引
if selectedNode == nil {
return nil, fmt.Errorf("无此索引的节点。%d", currentIndex)
}
return selectedNode, nil
//if selectedNode.NArgs.Status == et_rpc.NodeState_Healthy {
// s.UpdateIndex() // 如果节点健康,更新索引
// return selectedNode, nil
//}
// 如果当前节点不健康,尝试查找下一个健康节点
//for i := 1; i < len(nodes); i++ { // 从下一个节点开始查找
// nextIndex := (currentIndex + int32(i)) % int32(len(nodes))
// selectedNode = nodes[nextIndex]
// if selectedNode.NArgs.Status == et_rpc.NodeState_Healthy {
// s.UpdateIndex() // 找到健康节点,更新索引
// return selectedNode, nil
// }
//}
//
//// 如果没有健康节点,重置索引并返回错误
//atomic.StoreInt32(&s.index, 0)
//return nil, fmt.Errorf("所有节点都不健康")
}
// 更新索引的单独方法
func (s *RoundRobinSelector) UpdateIndex() {
atomic.AddInt32(&s.index, 1) // 原子增加索引
}

85
node/agg_worker/agg_node.go

@ -1,85 +0,0 @@
package agg_worker
import (
"et_analyze"
"gitea.anxinyun.cn/container/common_models"
"github.com/google/uuid"
"log"
"net/rpc"
"node/et_worker/et_recv"
"os"
"time"
)
type AggNode struct {
recvDataHandler *et_recv.RecvDataHanler
}
func NewAggWorker() *AggNode {
return &AggNode{
recvDataHandler: et_recv.NewRecvDataHanler(),
}
}
// Handler 是 RPC 接口,由 master 远程调用
func (the *AggNode) Handler(aggData common_models.AggData, replay *bool) error {
*replay = true
err := the.ConsumerProcess(&aggData)
if err != nil {
return err
}
return nil
}
// ConsumerProcess 处理阈值判断业务
func (the *AggNode) ConsumerProcess(aggData *common_models.AggData) error {
aggHandler := et_analyze.NewAggThresholdHandler()
aggHandler.ProcessData(aggData)
log.Printf("rpc聚集阈值分析[%d]-time[%s]-[%v]", aggData.SensorId, aggData.Date, aggData.Agg)
return nil
}
// RegisterToMaster 调用 master 发布的RPC服务方法 master.NodeRegister
func (the *AggNode) RegisterToMaster() {
connectCount := 0
for {
connectCount++
if connectCount > 3 {
log.Printf("RegisterToMaster 失败 超过%d次,准备退出", connectCount-1)
time.Sleep(time.Second * 10)
os.Exit(1)
}
masterAddr := os.Getenv("masterAddr")
if masterAddr == "" {
masterAddr = "127.0.0.1:50000"
}
time.Sleep(time.Second * 1)
master, err := rpc.Dial("tcp", masterAddr)
if err != nil {
log.Printf("链接失败-> node[%s]", masterAddr)
continue
}
//todo 获取node自己地址
nodeAddr := "127.0.0.1:40001"
status := `{"health_status":"healthy","load_average":{"1_min":0.75,"5_min":1.2,"15_min":0.9},"availability":"available","last_check_time":"2022-01-01T12:00:00Z"}`
resources := `{"cpu":{"cores":4,"usage":"50%","temperature":"60°C"},"memory":{"total":"8GB","used":"4GB","available":"4GB"},"storage":{"total":"256GB","used":"100GB","available":"156GB"}}`
nodeArgs := &common_models.NodeArgs{
ID: uuid.New().String(),
NodeType: "aggNode",
Status: status,
Resources: resources,
Addr: nodeAddr,
ThingIds: []string{},
}
var result bool
err = master.Call("master.NodeRegister", &nodeArgs, &result)
if err != nil {
log.Printf("node[%s]注册到master[%s]异常:%v", masterAddr, nodeAddr, result)
continue
}
break
}
}

108
node/app/app.go

@ -1,23 +1,9 @@
package app
import (
"encoding/gob"
"et_Info"
"et_analyze"
"et_cache"
"et_calc"
"et_calc/group"
"et_print"
"et_push"
"et_sink"
"fmt"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"gopkg.in/natefinch/lumberjack.v2"
"io"
"log"
"net"
"net/rpc"
"node/stages"
"os"
"time"
)
@ -25,7 +11,7 @@ import (
func init() {
multiWriter := io.MultiWriter(os.Stdout, &lumberjack.Logger{
Filename: "./logs/logInfo.log",
MaxSize: 10, // megabytes
MaxSize: 30, // megabytes
MaxBackups: 20,
MaxAge: 30, //days
//Compress: true,
@ -36,85 +22,25 @@ func init() {
}
func Start() {
// etNode 注册
nodeWorker := NewEtWorker()
// etNode 数据后处理环节
nodeStageManage := stages.NewStageManager()
nodeStageManage.AddSource(nodeWorker.ch)
//add 业务环节
nodeStageManage = addWorkStages(nodeStageManage)
// 启动 Node RPC 服务
nodeManager := NewETNode()
go nodeManager.startRPCServer()
<-nodeManager.grpcServerStarted
//add 测试环节
//nodeStageManage = addTestPrintStages(nodeStageManage)
// 初始化与 master 的连接
nodeManager.connectAndRegisterToMaster()
// 启动 etNode 处理
nodeStageManage.Run()
// 每 60 秒向 master 发送一次心跳
go nodeManager.heartbeat(60 * time.Second)
gob.Register([]interface{}{})
err := rpc.RegisterName("etNode", nodeWorker)
if err != nil {
log.Panicf("注册 etNode rpc 异常")
}
go nodeSerRpcListen()
// 启动退出监听的协程
nodeManager.startMonitorExit()
//后移注册流程,避免node启动异常的无效注册
nodeWorker.RegisterToMaster()
//go func() {
// for g := range chGroupData {
// log.Printf("groupItem: %v", g.Stations[0].Info.Name)
// log.Printf("chGroupData=%p,通道数据:%d/%d", chGroupData, len(chGroupData), cap(chGroupData))
// }
//}()
for {
time.Sleep(time.Hour)
}
}
func nodeSerRpcListen() {
port := configLoad.LoadConfig().GetUint16("node.port")
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
log.Panicf("服务启动rpc 异常=%s", err.Error())
}
log.Printf("服务监听=> :%d", port)
for {
conn, err := listener.Accept()
if err != nil {
log.Println("rpc Accept异常")
}
log.Printf("node 建立链接 from master[%s]", conn.RemoteAddr())
go rpc.ServeConn(conn)
}
}
func addWorkStages(nodeStageManage *stages.StageManager) *stages.StageManager {
// raws 数据存储
sinkRawHandler := et_sink.NewSinkRawHandler()
nodeStageManage.AddStages(sinkRawHandler.GetStage())
// 测点信息获取
infoHandler := et_Info.NewInfoHandler()
nodeStageManage.AddStages(infoHandler.GetStage())
// 单测点计算
calcHandler := et_calc.NewCalcHandler()
nodeStageManage.AddStages(calcHandler.GetStage())
// 滑窗过滤
cacheHandler := et_cache.NewCacheHandler()
nodeStageManage.AddStages(cacheHandler.GetStage())
// 测点分组计算
groupCalcHandler := group.NewGroupCalc()
nodeStageManage.AddStages(groupCalcHandler.GetStage())
// Theme 数据存储
sinkThemeHandler := et_sink.NewSinkThemeHandler()
nodeStageManage.AddStages(sinkThemeHandler.GetStage())
// 测点阈值分析
stationAnalyzeHandler := et_analyze.NewThresholdHandler()
nodeStageManage.AddStages(stationAnalyzeHandler.GetStage())
// 数据推送
publishHandler := et_push.NewPushHandler()
nodeStageManage.AddStages(publishHandler.GetStage())
return nodeStageManage
}
func addTestPrintStages(nodeStageManage *stages.StageManager) *stages.StageManager {
printHandler := et_print.NewPrintHandler()
nodeStageManage.AddStages(printHandler.GetStage())
return nodeStageManage
}

445
node/app/et_node.go

@ -1,273 +1,306 @@
package app
import (
"context"
"et_analyze"
"et_rpc"
"et_rpc/pb"
"fmt"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"net/rpc"
"node/et_worker/et_recv"
"net"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
)
type EtNode struct {
nodeInfo *common_models.NodeArgs
master *rpcMaster
ch chan *common_models.ProcessData
recvDataHandler *et_recv.RecvDataHanler
aggAnalyzeHandler *et_analyze.AggThresholdHandler
type ETNode struct {
// node rpc server 相关信息
grpcServer *grpc.Server
nodeServer *NodeServiceServer
grpcServerStarted chan struct{} // 通知主程序RPC已经启动
processChannels []chan []*common_models.ProcessData
groupDataChan chan []*common_models.ProcessData // 分组数据
// node 信息
nodeInfo *et_rpc.NodeArgs
Addr string
// master 信息
masterAddr string
masterConn *MasterConnection
}
type rpcMaster struct {
conn *rpc.Client
addr string
}
func NewETNode() *ETNode {
const processChannelsCount = 1
processBufSize := configLoad.LoadConfig().GetInt("performance.node.processBufSize")
processChannels := make([]chan []*common_models.ProcessData, processChannelsCount)
for i := 0; i < processChannelsCount; i++ {
processChannels[i] = make(chan []*common_models.ProcessData, processBufSize)
}
nodeServer := NewNodeServer(processChannels)
grpcServer := grpc.NewServer()
pb.RegisterNodeServiceServer(grpcServer, nodeServer)
const chSize = 1
// 创建grpc健康检查服务
healthServer := health.NewServer()
grpc_health_v1.RegisterHealthServer(grpcServer, healthServer)
// 设置初始健康状态
healthServer.SetServingStatus("NodeService", grpc_health_v1.HealthCheckResponse_SERVING)
func NewEtWorker() *EtNode {
node := &EtNode{
ch: make(chan *common_models.ProcessData, chSize),
recvDataHandler: et_recv.NewRecvDataHanler(),
aggAnalyzeHandler: et_analyze.NewAggThresholdHandler(),
m := &ETNode{
grpcServer: grpcServer,
nodeServer: nodeServer,
grpcServerStarted: make(chan struct{}),
processChannels: processChannels,
//groupDataChan: make(chan []*common_models.ProcessData, 500),
}
node.exitMonitor()
node.heartMonitor()
return node
// 初始化 Node 信息
m.nodeInfo = m.initNodeInfo()
return m
}
// IotaDataHandler 是 RPC 服务方法,由 master 远程调用
func (the *EtNode) IotaDataHandler(iotaData common_models.IotaData, reply *bool) error {
*reply = true
err := the.ConsumerProcess(&iotaData)
func (n *ETNode) startRPCServer() {
port := configLoad.LoadConfig().GetUint16("node.port")
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
*reply = false
log.Panicf("启动 Node RPC 服务失败:%v", err)
}
return err
}
defer listener.Close()
log.Printf("启动 Node RPC 服务成功,服务端口:%d", port)
// 是沉降测试数据
func isSettleData(data map[string]interface{}) bool {
// {"pressure":23.09,"temperature":24.93,"ssagee":16.44}
validKeys := map[string]bool{
"pressure": true,
"temperature": true,
"ssagee": true,
time.Sleep(100 * time.Millisecond)
close(n.grpcServerStarted)
// 启动 gRPC 服务器
if err := n.grpcServer.Serve(listener); err != nil {
log.Panicf("gRPC 服务器服务失败:%v", err)
}
}
if len(data) != 3 {
return false
func (n *ETNode) initNodeInfo() *et_rpc.NodeArgs {
// 获取主机的 IP 地址前缀
ipPrefix := configLoad.LoadConfig().GetString("node.hostIpPrefix")
ip4 := common_utils.ReadIP4WithPrefixFirst(ipPrefix)
// 获取主机名
hostName, err := os.Hostname()
if err != nil {
log.Fatalf("获取主机名失败: %v", err)
}
// 获取配置的端口号
log.Printf("node [%s] ip=%s\n", hostName, ip4)
port := configLoad.LoadConfig().GetUint16("node.port")
if port == 0 {
log.Fatalf("未配置有效的端口号")
}
// 构造 Node 的地址
nodeAddr := fmt.Sprintf("%s:%d", ip4, port)
log.Printf("node 的地址为 %s", nodeAddr)
n.Addr = nodeAddr
for key := range data {
if !validKeys[key] {
return false
}
// 初始化 Node 信息
return &et_rpc.NodeArgs{
ID: hostName + time.Now().Format("_20060102_150405"),
Status: et_rpc.NodeState_Healthy,
ErrCode: et_rpc.RPCReply_Success,
ResourceJson: "",
Addr: nodeAddr,
}
return true
}
// ConsumerProcess 将 IotaData -> ProcessData
func (the *EtNode) ConsumerProcess(iotaData *common_models.IotaData) error {
// 记录方法开始时间
startTime := time.Now()
//TODO #TEST BEGIN 测试静力水准仪 (现在有计算公式的单测点计算有问题,为了能跑通 沉降分组计算 测试)
//if !isSettleData(iotaData.Data.Data) {
// return nil
//}
// #TEST END
func (n *ETNode) connectAndRegisterToMaster() {
// 获取master配置
masterHost := configLoad.LoadConfig().GetString("node.remoteMasterHost")
masterPort := configLoad.LoadConfig().GetUint16("master.port")
if masterHost == "" {
masterHost = "127.0.0.1"
}
if masterPort == 0 {
masterPort = 50000
}
masterAddr := fmt.Sprintf("%s:%d", masterHost, masterPort)
deviceData, err := the.recvDataHandler.OnDataHandler(*iotaData)
// node 建立与 master 的连接
masterConn, err := NewMasterConnection(masterAddr)
if err != nil {
return err
log.Printf("ERROR: 建立与 Master[%s] 的连接失败!!\n", masterAddr)
} else {
n.masterConn = masterConn
log.Printf("建立与 Master[%s] 的连接成功!\n", masterAddr)
}
if deviceData == nil {
return nil
}
n.masterAddr = masterAddr
n.masterConn = masterConn
log.Printf("rpc处理设备数据[%s]-time[%v]-[%v]", deviceData.DeviceId, deviceData.AcqTime, deviceData.Raw)
time.Sleep(500 * time.Millisecond)
the.ch <- &common_models.ProcessData{
DeviceData: *deviceData,
Stations: []common_models.Station{},
// node 向 master 发送注册请求(node 调用 master 的注册服务)
err = n.register() //尝试3次
if err != nil {
// 3次尝试失败后退出程序
log.Fatalf("node[%s]->master[%s] 注册失败,Error: %v", n.nodeInfo.Addr, masterAddr, err)
//log.Println(err)
}
defer func() {
duration := time.Since(startTime)
log.Printf("ConsumerProcess(iotaData *common_models.IotaData)执行时长: %v", duration)
}()
return nil
}
// AggDataHandler 聚集阈值处理者,被 master 远程调用
func (the *EtNode) AggDataHandler(aggData common_models.AggData, reply *bool) error {
*reply = true
err := the.aggAnalyzeHandler.ProcessData(&aggData)
if err != nil {
errmsg := fmt.Sprintf("[etNode.AggDataHandler]变化速率阈值分析%s[aggTypeId:%d]ERROR: %v", aggData.R(), aggData.AggTypeId, err)
log.Println(errmsg)
return err
func (n *ETNode) register() error {
if n.masterConn == nil {
return fmt.Errorf("n.masterConn is nil")
}
log.Printf("[etNode.AggDataHandler]变化速率阈值分析SUCCESS。%s[aggTypeId:%d]changed[%v]", aggData.R(), aggData.AggTypeId, aggData.Changed)
return nil
}
//n.nodeInfo.Load = len(n.chIotaData)
const maxRetries = 3
retries := 0
// 实现源接口
func (the *EtNode) Process(ctx context.Context) (<-chan any, error) {
source := make(chan any, chSize)
go func() {
defer close(source)
for {
select {
case a := <-the.ch:
source <- a
log.Printf("存储数据=>source out,len=%d,%d", len(source), len(the.ch))
case <-ctx.Done():
log.Println("退出[source] EtNode.Process")
return
}
for {
err := n.masterConn.CallRegister(n.nodeInfo)
if err == nil {
log.Println("注册成功")
return nil // 注册成功,返回 nil
} else {
log.Printf("注册失败: %v", err)
retries++
}
if retries >= maxRetries {
log.Println("达到最大重试次数,停止注册尝试")
return fmt.Errorf("注册失败,达到最大重试次数: %v", err) // 返回错误
}
}()
return source, nil
// 每5秒发送一次注册消息
time.Sleep(5 * time.Second)
}
}
func (n *ETNode) unregister() {
reply := new(et_rpc.NodeArgs)
err := n.masterConn.CallUnregister()
if err != nil {
log.Printf("node[%s] 从master注销异常,err=%v", n.nodeInfo.Addr, err.Error())
} else {
log.Printf("node[%s] 从master注销成功。reply=%+v", n.nodeInfo.Addr, reply)
}
}
// RegisterToMaster 调用 master 发布的RPC服务方法 master.NodeRegister
func (the *EtNode) RegisterToMaster() {
maxCount := 3
connectCount := 0
func (n *ETNode) heartbeat(interval time.Duration) {
// 心跳3次失败后,发送注册消息
const maxRetries = 3
retries := 0
var err error
for {
connectCount++
if connectCount > maxCount {
log.Printf("RegisterToMaster 失败 超过%d次,准备退出", maxCount)
time.Sleep(time.Second * 10)
os.Exit(1)
}
masterAddr := loadMasterAddr()
masterConn, err := rpc.Dial("tcp", masterAddr)
if err != nil {
log.Printf("链接失败-> node[%s]", masterAddr)
time.Sleep(time.Second * 5)
if n.masterConn == nil {
log.Println("ERROR: masterConn is nil")
time.Sleep(1 * time.Second)
continue
}
the.master = &rpcMaster{
conn: masterConn,
addr: masterAddr,
}
time.Sleep(time.Millisecond * 200)
//获取node自己地址
ipPrefix := configLoad.LoadConfig().GetString("node.hostIpPrefix")
ip4 := common_utils.ReadIP4WithPrefixFirst(ipPrefix)
hostName, err := os.Hostname()
log.Printf("node [%s] ip=%s\n", hostName, ip4)
port := configLoad.LoadConfig().GetUint16("node.port")
callNodeAddr := fmt.Sprintf("%s:%d", ip4, port)
if the.nodeInfo == nil {
the.nodeInfo = &common_models.NodeArgs{
ID: hostName + time.Now().Format("_20060102_150405"),
NodeType: "etNode",
Status: "",
Resources: "",
Addr: callNodeAddr,
ThingIds: []string{},
err = n.masterConn.CallHeartbeatNode(n.nodeInfo.Addr)
if err == nil {
log.Println("心跳成功!!")
retries = 0
} else {
log.Printf("心跳消息发送失败: %v", err)
if strings.Contains(err.Error(), "未注册的节点") {
retries = 3
} else {
retries++
}
}
var result bool
err = the.master.conn.Call("master.NodeRegister", the.nodeInfo, &result)
if err != nil {
log.Printf("node[%s] 注册到 master[%s]异常:%v", the.nodeInfo.Addr, the.master.addr, result)
continue
}
break
}
}
func (the *EtNode) heartToMaster() {
maxCount := 3
connectCount := 0
reRegister := false
for {
connectCount++
if connectCount > maxCount {
log.Printf("heartToMaster 失败 超过%d次", maxCount)
reRegister = true
break
}
var result bool
err := the.master.conn.Call("master.NodeHeart", the.nodeInfo, &result)
if err != nil {
log.Printf("node[%s] 心跳到 master[%s]异常:%v", the.nodeInfo.Addr, the.master.addr, result)
time.Sleep(time.Second * 5)
continue
if retries >= maxRetries {
// 发送注注册、重置重试计数
err = n.register()
if err == nil {
log.Println("重新注册成功", n.Addr)
} else {
log.Println("重新注册失败", n.Addr)
}
retries = 0
}
break
}
if reRegister { //触发重新注册
log.Printf("node[%s] 心跳失败触发-重新注册到 master[%s]", the.nodeInfo.Addr, the.master.addr)
the.RegisterToMaster()
}
}
func (the *EtNode) UnRegisterToMaster() {
var result bool
if err := the.master.conn.Call("master.NodeUnRegister", the.nodeInfo, &result); err != nil {
log.Printf("node[%s] 从master注销,异常:%v", the.nodeInfo.Addr, err.Error())
} else {
log.Printf("node[%s] 从master注销,结果:%v", the.nodeInfo.Addr, result)
// 每60秒发送一次心跳
time.Sleep(interval)
}
}
func (the *EtNode) exitMonitor() {
go func() {
c := make(chan os.Signal, 1)
// 通过signal.Notify函数将信号通道c注册到系统相关的退出信号上
// 这里使用了两个退出信号:syscall.SIGINT(Ctrl+C)和syscall.SIGTERM(系统发送的退出信号)
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGKILL)
// 阻塞等待接收信号
s := <-c
log.Printf("接收到退出信号:%v,进行清理工作", s)
the.UnRegisterToMaster()
time.Sleep(3 * time.Second)
os.Exit(0)
}()
func (n *ETNode) startMonitorExit() {
c := make(chan os.Signal, 1)
// 通过signal.Notify函数将信号通道c注册到系统相关的退出信号上
// 这里使用了两个退出信号:syscall.SIGINT(Ctrl+C)和syscall.SIGTERM(系统发送的退出信号)
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGKILL)
// 阻塞等待接收信号
s := <-c
log.Printf("接收到退出信号:%v,进行清理工作", s)
// 注销
n.unregister()
// 等待所有数据处理完毕
n.waitForDataProcessing()
log.Printf("退出前,通道中数据已经处理完毕!!")
os.Exit(0)
}
func (the *EtNode) heartMonitor() {
func (n *ETNode) waitForDataProcessing() {
const waitInterval = 10 * time.Second
var wg sync.WaitGroup
// 处理 IotaData 通道
wg.Add(1)
go func() {
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()
for range ticker.C {
if the.master != nil {
log.Printf("node[%s] 心跳触发-> master[%s]", the.nodeInfo.Addr, the.master.addr)
the.heartToMaster()
defer wg.Done()
for _, ch := range n.processChannels {
for len(ch) > 0 {
time.Sleep(waitInterval)
}
}
}()
// 处理 groupDataChan 通道
wg.Add(1)
go func() {
defer wg.Done()
for len(n.groupDataChan) > 0 {
time.Sleep(waitInterval)
}
}()
// 等待所有 goroutine 完成
wg.Wait()
}
// LoadCh test用
func (the *EtNode) LoadCh() chan *common_models.ProcessData {
return the.ch
}
//func LogProcessDataTimeCost(nodeId, deviceId string, start time.Time) {
// tc := time.Since(start)
// log.Printf("******** [%s][%s]装载设备信息耗时: %v", nodeId, deviceId, tc)
//}
func loadMasterAddr() string {
masterHost := configLoad.LoadConfig().GetString("node.remoteMasterHost")
masterPort := configLoad.LoadConfig().GetUint16("master.port")
if masterHost == "" {
masterHost = "127.0.0.1"
}
if masterPort == 0 {
masterPort = 50000
}
return fmt.Sprintf("%s:%d", masterHost, masterPort)
}
// 是沉降测试数据
//func isSettleData(data map[string]interface{}) bool {
// // {"pressure":23.09,"temperature":24.93,"ssagee":16.44}
// validKeys := map[string]bool{
// "pressure": true,
// "temperature": true,
// "ssagee": true,
// }
//
// if len(data) != 3 {
// return false
// }
//
// for key := range data {
// if !validKeys[key] {
// return false
// }
// }
// return true
//}

242
node/app/master_connection_grpc.go

@ -0,0 +1,242 @@
package app
import (
"context"
"et_rpc"
"et_rpc/pb"
"fmt"
pool "github.com/jolestar/go-commons-pool"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"sync"
"time"
)
type MasterConnection struct {
MasterAddr string
Addr string
NArgs *et_rpc.NodeArgs
rpcPool *pool.ObjectPool
lastUpdate time.Time // 节点信息更新时间
ctx context.Context
mu sync.Mutex
factory *MasterGRPCClientFactory
}
// TODO NewMasterConnection 从配置文件中获取 pool 参数
func NewMasterConnection(masterAddr string) (*MasterConnection, error) {
ctx := context.Background()
factory := NewMasterGRPCClientFactory(masterAddr)
p := pool.NewObjectPoolWithDefaultConfig(ctx, factory)
p.Config.MaxTotal = 10 // 最大连接数
p.Config.MaxIdle = 5 // 最大空闲连接数据
p.Config.MinIdle = 1 // 最小空闲连接数据
p.Config.TestOnBorrow = true
p.Config.TestOnReturn = false
p.Config.TestWhileIdle = true // 是否在空闲时检查连接有效性
p.Config.MinEvictableIdleTime = 30 * time.Minute //空闲连接最小可驱逐时间
//p.Config.SoftMinEvictableIdleTime = 15 * time.Minute //空闲连接软最小可驱逐时间
conn := &MasterConnection{
ctx: ctx,
MasterAddr: masterAddr,
rpcPool: p,
}
// 获取连接进行简单的测试
obj, err := conn.rpcPool.BorrowObject(ctx)
if err != nil {
return nil, fmt.Errorf("建立RPC连接失败:%w", err)
}
defer conn.rpcPool.ReturnObject(ctx, obj)
grpcPoolObj, ok := obj.(*MasterGRPCPoolObject)
if !ok {
log.Fatalf("类型断言失败,obj 不是 *MasterGRPCPoolObject 类型")
}
// 健康检查
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn)
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{
Service: "MasterService",
})
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
return nil, fmt.Errorf("健康检查失败: %v, 状态: %v", err, resp.Status)
}
conn.factory = factory
return conn, nil
}
func (n *MasterConnection) BorrowValidConnection(ctx context.Context) (*pool.PooledObject, error) {
var obj1 interface{}
var err error
// 尝试借用连接,最多重试 3 次
for attempts := 0; attempts < 3; attempts++ {
obj1, err = n.rpcPool.BorrowObject(ctx)
if err == nil {
break
}
log.Printf("Attempt %d: Failed to borrow object from pool: %v", attempts+1, err)
time.Sleep(1 * time.Second)
}
if err != nil {
return nil, fmt.Errorf("borrow object error after 3 attempts: %w", err)
}
pooledObject, ok := obj1.(*pool.PooledObject)
if !ok {
return nil, log.Output(2, "Invalid object type from pool") // 类型不匹配,返回错误
}
if !n.factory.ValidateObject(ctx, pooledObject) {
err = n.factory.DestroyObject(ctx, pooledObject)
if err != nil {
return nil, err
}
obj1, err = n.factory.MakeObject(ctx)
if err != nil {
return nil, err
}
pooledObject, ok = obj1.(*pool.PooledObject)
if !ok {
return nil, log.Output(2, "Invalid object type from pool after recreation") // 类型不匹配,返回错误
}
}
return pooledObject, nil
}
func (n *MasterConnection) CallRegister(nodeInfo *et_rpc.NodeArgs) error {
n.NArgs = nodeInfo
// 创建新的上下文并设置超时
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
// 从连接池中借用一个连接
obj1, err := n.rpcPool.BorrowObject(ctx)
if err != nil {
return fmt.Errorf("gRPC[CallRegister] 借用对象错误: %w", err)
}
// 使用连接相关处理
rpcPoolObj := obj1.(*MasterGRPCPoolObject)
defer func() {
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil {
log.Printf("gRPC[CallRegister] 归还对象到连接池失败: %v", err)
}
}()
// 进行 RPC 调用
request := &pb.NodeRequest{
Id: fmt.Sprintf("master-%s", n.MasterAddr),
Address: nodeInfo.Addr,
ThingIds: make([]string, 0),
}
resp, err := rpcPoolObj.Client.RegisterNode(ctx, request)
if err != nil {
return fmt.Errorf("调用 gRPC[CallRegister] 错误: %w", err)
}
log.Printf("调用 gRPC[CallRegister] resp=%+v, err=%+v\n", resp, err)
// 归还连接
//err = n.rpcPool.ReturnObject(ctx, obj1)
//if err != nil {
// log.Printf("归还对象到连接池失败: %v", err)
// return fmt.Errorf("归还对象错误: %w", err)
//}
return nil
}
func (n *MasterConnection) CallUnregister() error {
// 创建新的上下文并设置超时
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
// 从连接池中借用一个连接
obj1, err := n.rpcPool.BorrowObject(ctx)
if err != nil {
return fmt.Errorf("gRPC[CallUnregister] 借用对象错误: %w", err)
}
// 使用连接相关处理
rpcPoolObj := obj1.(*MasterGRPCPoolObject)
defer func() {
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil {
log.Printf("gRPC[CallUnregister] 归还对象到连接池失败: %v", err)
}
}()
// 进行 RPC 调用
request := &pb.NodeRequest{
Id: "",
Address: n.Addr,
ThingIds: make([]string, 0),
}
resp, err := rpcPoolObj.Client.UnregisterNode(ctx, request)
if err != nil {
return fmt.Errorf("调用 gRPC[CallUnregister] 错误: %w", err)
}
log.Printf("调用 gRPC[CallUnregister] resp=%+v, err=%+v\n", resp, err)
// 归还连接
err = n.rpcPool.ReturnObject(ctx, obj1)
if err != nil {
log.Printf("归还对象到连接池失败: %v", err)
return fmt.Errorf("归还对象错误: %w", err)
}
return nil
}
func (n *MasterConnection) CallHeartbeatNode(nodeAddr string) error {
// 创建新的上下文并设置超时
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
// 从连接池中借用一个连接
obj1, err := n.rpcPool.BorrowObject(ctx)
if err != nil {
return fmt.Errorf("gRPC[CallHeartbeatNode] 借用对象错误: %w", err)
}
// 使用连接相关处理
rpcPoolObj := obj1.(*MasterGRPCPoolObject)
defer func() {
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil {
log.Printf("gRPC[CallHeartbeatNode] 归还对象到连接池失败: %v", err)
}
log.Printf("gRPC[CallHeartbeatNode] 已归还对象 obj1 。")
}()
// 进行 RPC 调用
request := &pb.NodeRequest{
Id: "",
Address: nodeAddr,
ThingIds: make([]string, 0),
}
resp, err := rpcPoolObj.Client.HeartbeatNode(ctx, request)
if err != nil {
return fmt.Errorf("调用 gRPC[CallHeartbeatNode] 错误: %w", err)
}
log.Printf("调用 gRPC[CallHeartbeatNode] resp=%+v, err=%+v\n", resp, err)
// 归还连接
//err = n.rpcPool.ReturnObject(ctx, obj1)
//if err != nil {
// log.Printf("归还对象到连接池失败: %v", err)
// return fmt.Errorf("归还对象错误: %w", err)
//}
return nil
}

112
node/app/master_connection_pool_grpc.go

@ -0,0 +1,112 @@
package app
import (
"context"
"et_rpc/pb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/health/grpc_health_v1"
"log"
"time"
pool "github.com/jolestar/go-commons-pool"
)
type MasterGRPCPoolObject struct {
Conn *grpc.ClientConn // 保存 gRPC 连接
Client pb.MasterServiceClient // gRPC 客户端
}
type MasterGRPCClientFactory struct {
address string
}
// NewGRPCClientFactory 创建新的 gRPC 连接工厂
func NewMasterGRPCClientFactory(address string) *MasterGRPCClientFactory {
return &MasterGRPCClientFactory{
address: address,
}
}
func (f *MasterGRPCClientFactory) MakeObject(ctx context.Context) (*pool.PooledObject, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// 定义重试策略
serviceConfig := `{
"methodConfig": [{
"name": [{"service": "MasterService", "method": "*"}],
"retryPolicy": {
"maxAttempts": 2,
"initialBackoff": "1s",
"maxBackoff": "10s",
"backoffMultiplier": 2,
"retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"]
}
}]
}`
conn, err := grpc.NewClient(
f.address,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultServiceConfig(serviceConfig),
)
if err != nil {
return nil, err // 如果3次都失败,返回错误
}
client := pb.NewMasterServiceClient(conn)
return pool.NewPooledObject(
&MasterGRPCPoolObject{
Conn: conn,
Client: client,
},
), nil
}
// 销毁 gRPC 连接
func (f *MasterGRPCClientFactory) DestroyObject(ctx context.Context, object *pool.PooledObject) error {
grpcPoolObj := object.Object.(*MasterGRPCPoolObject)
if grpcPoolObj.Client != nil {
// 关闭连接
grpcPoolObj.Conn.Close() // gRPC 客户端连接关闭
}
return nil
}
// 验证 gRPC 连接的有效性
func (f *MasterGRPCClientFactory) ValidateObject(ctx context.Context, object *pool.PooledObject) bool {
grpcPoolObj := object.Object.(*MasterGRPCPoolObject)
select {
case <-ctx.Done():
return false // 如果上下文已经取消,返回无效
default:
// 继续进行有效性检查
}
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn)
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{
Service: "MasterService",
})
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
log.Println("ValidateObject failed:", err)
return false
}
return true
}
// 激活 gRPC 连接
func (f *MasterGRPCClientFactory) ActivateObject(ctx context.Context, object *pool.PooledObject) error {
// 可以在这里发送心跳请求以确保连接有效
return nil
}
// 非激活 gRPC 连接
func (f *MasterGRPCClientFactory) PassivateObject(ctx context.Context, object *pool.PooledObject) error {
// 可以在这里进行连接的重置,例如清除状态或缓存
return nil
}

68
node/app/node_process_stage.go

@ -0,0 +1,68 @@
package app
import (
"et_Info"
"et_analyze"
"et_cache"
"et_cache/cacheSer"
"et_calc"
"et_calc/group"
"et_push"
"et_sink"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"gitea.anxinyun.cn/container/common_utils/dbHelper"
"gitea.anxinyun.cn/container/common_utils/storage/storageDBs"
"node/stages"
)
func CreateStages(inChan chan []*common_models.ProcessData, outChan chan []*common_models.ProcessData) *stages.StageManager {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
configHelper := common_utils.NewConfigHelper(redisAddr)
cacheServer := cacheSer.NewCacheServer(configHelper)
esAddresses := configLoad.LoadConfig().GetStringSlice("es.addresses")
esESHelper := dbHelper.NewESHelper(esAddresses, "", "")
storageConsumers := storageDBs.LoadIStorageConsumer()
// etNode 数据后处理环节
nodeStageManage := stages.NewStageManager(outChan)
nodeStageManage.AddSource(inChan)
// raws 数据存储
sinkRawHandler := et_sink.NewSinkRawHandler(storageConsumers)
nodeStageManage.AddStages(sinkRawHandler.GetStage())
// 测点信息获取
infoHandler := et_Info.NewInfoHandler(configHelper)
nodeStageManage.AddStages(infoHandler.GetStage())
// 单测点计算
calcHandler := et_calc.NewCalcHandler(configHelper, cacheServer, esESHelper)
nodeStageManage.AddStages(calcHandler.GetStage())
// 测点数据缓存(滑窗过滤)
cacheHandler := et_cache.NewCacheHandler(cacheServer)
nodeStageManage.AddStages(cacheHandler.GetStage())
// 测点分组计算
groupCalcHandler := group.NewGroupCalc(configHelper)
nodeStageManage.AddStages(groupCalcHandler.GetStage())
// Theme 数据存储
sinkThemeHandler := et_sink.NewSinkThemeHandler(storageConsumers)
nodeStageManage.AddStages(sinkThemeHandler.GetStage())
// 测点阈值分析
stationAnalyzeHandler := et_analyze.NewThresholdHandler()
nodeStageManage.AddStages(stationAnalyzeHandler.GetStage())
//数据推送
pushEnable := configLoad.LoadConfig().GetBool("push.enable")
if pushEnable {
publishHandler := et_push.NewPushHandler()
nodeStageManage.AddStages(publishHandler.GetStage())
}
return nodeStageManage
}

455
node/app/node_server.go

@ -0,0 +1,455 @@
package app
import (
"context"
"encoding/json"
"et_analyze"
"et_rpc/pb"
"fmt"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"golang.org/x/time/rate"
"log"
"math"
"node/et_worker/et_recv"
"strings"
"sync"
"time"
)
type UTCTime time.Time
// UnmarshalJSON 自定义日期解析
func (ut *UTCTime) UnmarshalJSON(data []byte) error {
// 去掉 JSON 字符串的引号
str := string(data)
if len(str) < 2 || str[0] != '"' || str[len(str)-1] != '"' {
return fmt.Errorf("invalid time format: %s", str)
}
str = str[1 : len(str)-1]
// 解析自定义日期格式
t, err := time.Parse("2006-01-02T15:04:05.999-0700", str)
if err != nil {
return fmt.Errorf("failed to parse time: %v", err)
}
// 赋值
*ut = UTCTime(t)
return nil
}
// aggDataMsg: {"date":"2024-09-19T09:39:59.999+0000","sensorId":106,"structId":1,"factorId":11,"aggTypeId":2006,"aggMethodId":3004,"agg":{"strain":-19.399999618530273},"changed":{"strain":-3}}
type AggDataJson struct {
Date UTCTime
SensorId int
StructId int
FactorId int
AggTypeId int // 聚集类型 : 10分钟/30分钟/3小时/6小时/12小时/时/日/周/月聚集
AggMethodId int // 聚集方法 : 平均值/最大值/最小值
Agg map[string]float64 // 聚集数据
Changed map[string]float64 // 变化量
}
// NodeServiceServer 实现了 NodeServiceServer 接口
type NodeServiceServer struct {
pb.UnimplementedNodeServiceServer // 嵌入未实现的接口,以便将来兼容性
Addr string
Load int32
iotaDataHandler *et_recv.RecvDataHandler
aggDataHandler *et_analyze.AggThresholdHandler
iotaChannels []chan []common_models.IotaData
processChannels []chan []*common_models.ProcessData
outProcessChannel chan []*common_models.ProcessData
nextProcessChannel int // 记录下一个要尝试的通道索引
nextChannel int // 记录下一个要尝试的通道索引
mu sync.Mutex // 保护 nextChannel 的并发访问
}
func NewNodeServer(processChannels []chan []*common_models.ProcessData) *NodeServiceServer {
stageResultBufSize := configLoad.LoadConfig().GetInt("performance.node.stageResultBufSize")
iotaBufSize := configLoad.LoadConfig().GetInt("performance.node.iotaBufSize")
s := &NodeServiceServer{
iotaDataHandler: et_recv.NewRecvDataHandler(),
aggDataHandler: et_analyze.NewAggThresholdHandler(),
processChannels: processChannels,
outProcessChannel: make(chan []*common_models.ProcessData, stageResultBufSize),
}
s.iotaChannels = s.NewIotaChannels(len(processChannels), iotaBufSize)
// 启动 DeviceInfo 缓存更新协程,设置为每 10 分钟更新一次
s.iotaDataHandler.RecvConfigHelper.StartUpdateDeviceInfo(10*time.Minute, 30)
// 处理 process data
s.RunStageManager()
time.Sleep(500 * time.Millisecond)
// 将 IotaData 转换为 DeviceData, 转换后数据发送到 s.processChannels
go s.HandleIotaChannels()
return s
}
func (s *NodeServiceServer) NewIotaChannels(count, bufferSize int) []chan []common_models.IotaData {
channels := make([]chan []common_models.IotaData, count)
for i := 0; i < count; i++ {
channels[i] = make(chan []common_models.IotaData, bufferSize)
}
return channels
}
func (s *NodeServiceServer) HandleIotaChannels() {
iotaWorkerCount := configLoad.LoadConfig().GetInt("performance.node.iotaWorkerCount")
for index, ch := range s.iotaChannels {
for w := 0; w < iotaWorkerCount; w++ {
go func(c chan []common_models.IotaData) {
s.HandleIotaChan(c, index)
}(ch)
}
}
}
// func (s *NodeServiceServer) HandleProcessChannels() {
func (s *NodeServiceServer) RunStageManager() {
for _, ch := range s.processChannels {
go func(c chan []*common_models.ProcessData) {
stageMgr := CreateStages(c, s.outProcessChannel)
stageMgr.RunStages()
}(ch)
}
}
func (s *NodeServiceServer) sendToIotaChannels(data []common_models.IotaData) (chan []common_models.IotaData, bool) {
startTime := time.Now()
defer func() {
elapsedTime := time.Since(startTime)
log.Printf("sendToIotaChannels elapsedTime= %s\n", elapsedTime)
//log.Printf("Final iotaData channel states: ")
//for _, ch := range s.iotaChannels {
// log.Printf("iotaChan[%p]: %d/%d\n", ch, len(ch), cap(ch))
//}
}()
var selectedChannel chan []common_models.IotaData
minLength := math.MaxInt32
// 选择最空闲的通道
for _, ch := range s.iotaChannels {
if len(ch) < minLength {
minLength = len(ch)
selectedChannel = ch
}
}
// 尝试发送数据
select {
case selectedChannel <- data:
return selectedChannel, true
case <-time.After(100 * time.Millisecond): // 设置超时时间
log.Println("Timeout while trying to send iotaData.")
return nil, false
}
}
// 将 IotaData 转换为 DeviceData, 转换后数据发送到 s.processChannels
func (s *NodeServiceServer) HandleIotaChan(ch chan []common_models.IotaData, index int) {
// 创建一个速率限制器,每秒允许处理 100 条数据
limiter := rate.NewLimiter(10, 1) // 100 次/秒,突发容量为 1
for data := range ch {
// 等待直到可以处理下一条数据
if err := limiter.Wait(context.Background()); err != nil {
log.Printf("处理速率限制错误: %v", err)
continue
}
go func(iotaDataArray []common_models.IotaData) {
dataHandleTime := time.Now() // 记录 data 的处理开始时间
formattedTime := dataHandleTime.Format("2006-01-02 15:04:05.999999999")
defer func() {
if r := recover(); r != nil {
log.Printf("Recovered from panic: %v", r)
}
log.Printf("4.iotaDataArray[%v] 处理耗时:%v", formattedTime, time.Since(dataHandleTime))
}()
log.Printf("1.iotaDataArray[%v] 准备处理。processChannel[%p]数据量:%d/%d", formattedTime, ch, len(ch), cap(ch))
processDataArray := make([]*common_models.ProcessData, 0, len(iotaDataArray))
for _, r := range iotaDataArray {
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
iotaHandleTime := time.Now() // 记录 iota 数据处理开始时间
deviceData, err := s.iotaDataHandler.OnDataHandler(ctx, r)
iotaHandleElapse := time.Since(iotaHandleTime) // 计算数据处理耗时
if err != nil {
log.Printf("IotaData->DeviceData[%s] 转换错误:%s. 耗时:%v。", r.DeviceId, err.Error(), iotaHandleElapse)
return
}
if deviceData == nil {
log.Printf("IotaData->DeviceData[%s] 转换错误:deviceData is nil. 耗时:%v。", r.DeviceId, iotaHandleElapse)
return
}
// 将数据写入 processDataChannel
pd := &common_models.ProcessData{
DeviceData: *deviceData,
Stations: []common_models.Station{},
}
processDataArray = append(processDataArray, pd)
}
log.Printf("2.iotaDataArray[%v] 已处理完 IotaData->DeviceData ", formattedTime)
sendTime := time.Now()
processChannel, ok := s.sendToProcessChannels(processDataArray, index) // 这里会一直等到有资源
if !ok {
log.Printf("3.iotaDataArray[%v] s.processChannels %d个通道都已满,被阻塞。", formattedTime, len(s.processChannels))
} else {
log.Printf("3.iotaDataArray[%v] 已发送至s.processChannels。processChannel[%p]数据量:%d/%d, \n发送耗时:%v ,iotaDataArray处理耗时:%v",
formattedTime, processChannel, len(processChannel), cap(processChannel), time.Since(sendTime), time.Since(dataHandleTime))
}
}(data)
}
}
func (s *NodeServiceServer) sendToProcessChannels(data []*common_models.ProcessData, index int) (chan []*common_models.ProcessData, bool) {
startTime := time.Now()
//timeoutDuration := configLoad.LoadConfig().GetUint16("performance.node.processTimeout") // 设置超时时间为 60 秒
defer func() {
elapsedTime := time.Since(startTime)
log.Printf("[sendToProcessChannels] elapsedTime= %s\n", elapsedTime)
//log.Printf("[sendToProcessChannels] Final processData channel states:")
//for _, ch := range s.processChannels {
// log.Printf("[sendToProcessChannels] processChan[%p]: %d/%d\n", ch, len(ch), cap(ch))
//}
}()
selectedChannel := s.processChannels[index]
log.Printf("[sendToProcessChannels] 尝试发送。 channel[%p]: %d/%d\n", selectedChannel, len(selectedChannel), cap(selectedChannel))
// 超时限制
//timeout := time.After(time.Duration(timeoutDuration))
for {
select {
case selectedChannel <- data:
// 发送成功
log.Printf("[sendToProcessChannels] 发送成功。channel[%p]: %d/%d\n", selectedChannel, len(selectedChannel), cap(selectedChannel))
time.Sleep(50 * time.Millisecond)
return selectedChannel, true
default:
//log.Printf("[sendToProcessChannels] channel[%p] 已满 cap=%d,继续尝试发送。\n", selectedChannel, cap(selectedChannel))
time.Sleep(200 * time.Millisecond) // 等待一段时间后再尝试
//case <-timeout:
// log.Printf("[sendToProcessChannels] 发送超时超过1分钟,将停止尝试。\n")
// return nil, false // 超时返回 nil 和 false
//case <-time.After(500 * time.Millisecond):
// log.Printf("[sendToProcessChannels] 发送超时500ms,将继续尝试channel[%p]。\n", selectedChannel)
// continue
}
}
}
var limiter = rate.NewLimiter(rate.Limit(500), 1) // 每秒最多处理 1000 条数据
func (s *NodeServiceServer) HandleIotaData(ctx context.Context, req *pb.HandleDataRequest) (*pb.HandleDataResponse, error) {
if err := limiter.Wait(ctx); err != nil {
return nil, fmt.Errorf("请求速率过高,请稍后重试")
}
startTime := time.Now()
// 1. 数据转换
conversionStart := time.Now()
iotaDataList := s.convertMessages2IotaData(req.Messages)
conversionDuration := time.Since(conversionStart)
log.Printf("[INFO][HandleIotaData] 1.数据转换耗时: %v, 共 %d 条数据。", conversionDuration, len(req.Messages))
// 2. 发送到 Iota 通道
sendStart := time.Now()
_, ok := s.sendToIotaChannels(iotaDataList)
sendDuration := time.Since(sendStart)
log.Printf("[INFO] [HandleIotaData] 2.sendToIotaChannels耗时: %v。", sendDuration)
//log.Printf("[INFO] [HandleIotaData] 2.sendToIotaChannels耗时: %v。通道状态: %v, 通道指针: %p, 当前长度: %d/%d",
// sendDuration, ok, ch, len(ch), cap(ch))
if !ok {
log.Printf("[WARN] [HandleIotaData] 2.所有 Iota 通道已满,无法发送数据,通道数量: %d", len(s.iotaChannels))
return &pb.HandleDataResponse{
Addr: s.Addr,
Load: s.Load,
Status: pb.HandleDataResponse_SUCCESS,
ErrorMessage: "s.iotaChannels 通道已满",
}, nil
}
// 3. 计算总处理时长
totalDuration := time.Since(startTime)
log.Printf("[INFO] [HandleIotaData] 3.总处理时长: %v", totalDuration)
return &pb.HandleDataResponse{
Addr: s.Addr,
Load: s.Load,
Status: pb.HandleDataResponse_SUCCESS,
ErrorMessage: "",
}, nil
}
// HandleAggData 处理聚集数据并返回节点响应
func (s *NodeServiceServer) HandleAggData(ctx context.Context, req *pb.HandleDataRequest) (*pb.HandleDataResponse, error) {
if err := limiter.Wait(ctx); err != nil {
return nil, fmt.Errorf("请求速率过高,请稍后重试")
}
startTime := time.Now()
// 1. 数据转换
conversionStart := time.Now()
aggDataList := s.convertMessages2AggData(req.Messages)
conversionDuration := time.Since(conversionStart)
log.Printf("[INFO][HandleAggData] 1.数据转换耗时: %v, 共 %d 条数据。", conversionDuration, len(req.Messages))
// 2. 发送到 Iota 通道
analyzeStart := time.Now()
for _, aggData := range aggDataList {
err := s.aggDataHandler.ProcessData(&aggData)
if err != nil {
errmsg := fmt.Sprintf("[etNode.AggDataHandler] 2.变化速率阈值分析%s[aggTypeId:%d]ERROR: %v", aggData.R(), aggData.AggTypeId, err)
log.Println(errmsg)
}
//} else {
// log.Printf("[etNode.AggDataHandler]变化速率阈值分析SUCCESS。%s[aggTypeId:%d]changed[%v]", aggData.R(), aggData.AggTypeId, aggData.Changed)
//}
}
analyzeDuration := time.Since(analyzeStart)
log.Printf("[INFO][HandleAggData] 2. 变化速率阈值分析耗时: %v。", analyzeDuration)
// 3. 计算总处理时长
totalDuration := time.Since(startTime)
log.Printf("[INFO][HandleAggData] 3.总处理时长: %v", totalDuration)
// 返回响应
return &pb.HandleDataResponse{
Addr: s.Addr,
Load: s.Load,
Status: pb.HandleDataResponse_SUCCESS,
ErrorMessage: "",
}, nil
}
// mustEmbedUnimplementedNodeServiceServer 确保实现了接口
func (s *NodeServiceServer) mustEmbedUnimplementedNodeServiceServer() {}
// createErrorResponse 用于创建错误响应
func (s *NodeServiceServer) createErrorResponse(status pb.HandleDataResponse_Status, message string) (*pb.HandleDataResponse, error) {
response := &pb.HandleDataResponse{
Addr: s.Addr,
Load: s.Load,
Status: status,
ErrorMessage: message,
}
log.Printf(message) // 记录错误信息
return response, fmt.Errorf(message)
}
func (s *NodeServiceServer) convertMessages2IotaData(messages []string) []common_models.IotaData {
st := time.Now()
dataArray := make([]common_models.IotaData, 0, len(messages))
// 尝试批量解析
jsonArray := fmt.Sprintf("[%s]", strings.Join(messages, ","))
if err := json.Unmarshal([]byte(jsonArray), &dataArray); err != nil {
// 批量解析失败,逐个解析
for _, msg := range messages {
var data common_models.IotaData
if err := json.Unmarshal([]byte(msg), &data); err != nil {
log.Printf("逐个 JSON 反序列化失败:%v", err)
continue
}
dataArray = append(dataArray, data)
}
}
log.Printf("[convertMessages2IotaData] 序列化耗时:%v ,共解析出 %d 个 IotaData。", time.Since(st), len(dataArray))
return dataArray
}
func (s *NodeServiceServer) convertMessages2AggData(messages []string) []common_models.AggData {
//log.Printf("[convertMessages2AggData] len(messages)=%d ,start ...", len(messages))
st := time.Now()
// 预分配 aggDatas 的容量
aggDatas := make([]common_models.AggData, 0, len(messages))
// 尝试批量解析 JSON 数组
jsonArray := fmt.Sprintf("[%s]", strings.Join(messages, ","))
var tmpDatas []AggDataJson
err := json.Unmarshal([]byte(jsonArray), &tmpDatas)
if err != nil {
log.Printf("JSON 数组反序列化失败,尝试逐个解析:%v", err)
// 如果批量解析失败,逐个解析 JSON 字符串
var wg sync.WaitGroup
var mu sync.Mutex
for _, val := range messages {
wg.Add(1)
go func(msg string) {
defer wg.Done()
var data AggDataJson
if err := json.Unmarshal([]byte(msg), &data); err != nil {
log.Printf("逐个 JSON 反序列化失败:%v", err)
return
}
// 加锁保护 aggDatas
mu.Lock()
aggDatas = append(aggDatas, common_models.AggData{
Date: time.Time(data.Date),
SensorId: data.SensorId,
StructId: data.StructId,
FactorId: data.FactorId,
AggTypeId: data.AggTypeId,
AggMethodId: data.AggMethodId,
Agg: data.Agg,
Changed: data.Changed,
ThingId: "",
})
mu.Unlock()
}(val)
}
// 等待所有 Goroutine 完成
wg.Wait()
} else {
// 批量解析成功,直接转换
aggDatas = make([]common_models.AggData, len(tmpDatas))
for i, data := range tmpDatas {
aggDatas[i] = common_models.AggData{
Date: time.Time(data.Date),
SensorId: data.SensorId,
StructId: data.StructId,
FactorId: data.FactorId,
AggTypeId: data.AggTypeId,
AggMethodId: data.AggMethodId,
Agg: data.Agg,
Changed: data.Changed,
ThingId: "",
}
}
}
log.Printf("[convertMessages2AggData] 序列化耗时:%v ,共解析出 %d 个 AggData。", time.Since(st), len(aggDatas))
return aggDatas
}

75
node/et_worker/et_recv/recvDataHanler.go

@ -1,6 +1,7 @@
package et_recv
import (
"context"
"encoding/json"
"errors"
"fmt"
@ -13,6 +14,19 @@ import (
"time"
)
type RecvDataHandler struct {
RecvConfigHelper *common_utils.ConfigHelper
alarmCacheUtil *common_utils.AlarmCacheUtil
}
func NewRecvDataHandler() *RecvDataHandler {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
return &RecvDataHandler{
RecvConfigHelper: common_utils.NewConfigHelper(redisAddr),
alarmCacheUtil: common_utils.NewAlarmCacheUtil(),
}
}
func Recover(deviceId string, structId int, alarmType string, time time.Time) {
alarm := common_models.AlarmMsg{
MessageMode: common_models.Alarm_Mode_AutoElimination,
@ -74,37 +88,36 @@ func AlarmDtuToOut(device *common_models.DeviceInfo, alarmType, alarmCode, conte
go kafkaHelper.Send2Topic(brokers, "native_alarm", string(jsonOut))
}
type RecvDataHanler struct {
configHelper *common_utils.ConfigHelper
alarmCacheUtil *common_utils.AlarmCacheUtil
}
func NewRecvDataHanler() *RecvDataHanler {
redisAddr := configLoad.LoadConfig().GetString("redis.address")
return &RecvDataHanler{
configHelper: common_utils.NewConfigHelper(redisAddr),
alarmCacheUtil: common_utils.NewAlarmCacheUtil(),
// OnDataHandler iota 数据处理
func (the *RecvDataHandler) OnDataHandler(ctx context.Context, iotaData common_models.IotaData) (*common_models.DeviceData, error) {
if iotaData.DeviceId == "" {
return nil, fmt.Errorf("DeviceId is null")
}
}
// OnDataHandler iota 数据处理
func (the *RecvDataHanler) OnDataHandler(iotaData common_models.IotaData) (*common_models.DeviceData, error) {
configHelper := the.configHelper //common_utils.NewConfigHelper(common_utils.NewRedisHelper("", "192.168.31.128:30379"))
//startTime := time.Now() // 记录开始时间
deviceInfo, err := configHelper.GetDeviceInfo(iotaData.DeviceId)
// 使用带有超时的上下文来获取设备信息
deviceInfo, err := the.RecvConfigHelper.GetDeviceInfo(iotaData.DeviceId, false)
if err != nil {
return nil, err
}
// 检查上下文是否被取消
select {
case <-ctx.Done():
return nil, ctx.Err() // 返回上下文的错误
default:
// 继续处理
}
if deviceInfo == nil {
errMsg := fmt.Sprintf("[%s] DeviceId not found in redis ", iotaData.DeviceId)
errMsg := fmt.Sprintf("[%s] not found in redis ", iotaData.DeviceId)
log.Printf(errMsg)
return nil, errors.New(errMsg)
}
//code := iotaData.ThemeData.Result.Code
//taskId := iotaData.ReadTaskId()
// 处理数据
if iotaData.Data.Success() {
//数据恢复设备高告警
if len(iotaData.Data.Data) == 0 {
log.Printf("[%s] empty data received", iotaData.DeviceId)
}
@ -124,13 +137,13 @@ func (the *RecvDataHanler) OnDataHandler(iotaData common_models.IotaData) (*comm
}
}
} else {
var leafNodes = configHelper.GetSubDeviceNext(iotaData.DeviceId, iotaData.ThingId)
var leafNodes = the.RecvConfigHelper.GetSubDeviceNext(iotaData.DeviceId, iotaData.ThingId)
if len(leafNodes) > 0 {
//todo
}
//Key_alarm_code
alarmTypeOpt, err := configHelper.GetAlarmCode(strconv.Itoa(iotaData.Data.Result.Code))
alarmTypeOpt, err := the.RecvConfigHelper.GetAlarmCode(strconv.Itoa(iotaData.Data.Result.Code))
if err == nil {
the.alarmCacheUtil.Add(the.alarmCacheUtil.ALARM_SOURCE_DEVICE, alarmTypeOpt.TypeCode)
AlarmToOut(deviceInfo.Id, deviceInfo.Structure.Id, alarmTypeOpt.TypeCode, iotaData.TriggerTime, leafNodes)
@ -139,7 +152,6 @@ func (the *RecvDataHanler) OnDataHandler(iotaData common_models.IotaData) (*comm
if alarmTypeOpt.TypeCode == common_models.Alarm_Type_OutRange {
iotaData.Data.Result.Code = 0
}
}
if iotaData.Data.Result.Code == 0 {
@ -163,17 +175,26 @@ func (the *RecvDataHanler) OnDataHandler(iotaData common_models.IotaData) (*comm
DimensionId: iotaData.DimensionId,
DataType: dataType,
}
return data, err
// 记录耗时
//elapsedTime := time.Since(startTime)
//log.Printf("[iotaData -> deviceData] deviceID[%s] 转换耗时: %v", iotaData.DeviceId, elapsedTime)
return data, nil
}
return nil, err
// 记录耗时
//elapsedTime := time.Since(startTime)
//log.Printf("OnDataHandler 耗时: %v", elapsedTime)
return nil, nil
}
// OnAlarmHandler iota 告警处理
func (the *RecvDataHanler) OnAlarmHandler(iotaAlarm common_models.IotaAlarm) {
configHelper := the.configHelper
func (the *RecvDataHandler) OnAlarmHandler(iotaAlarm common_models.IotaAlarm) {
configHelper := the.RecvConfigHelper
deviceId := iotaAlarm.Labels.DeviceId
deviceInfo, err := configHelper.GetDeviceInfo(deviceId)
deviceInfo, err := configHelper.GetDeviceInfo(deviceId, false)
if err != nil {
return
} else {

43
node/stages/stage.go

@ -1,7 +1,9 @@
package stages
import (
"fmt"
"gitea.anxinyun.cn/container/common_models"
"gitea.anxinyun.cn/container/common_utils/configLoad"
"log"
"time"
)
@ -9,31 +11,35 @@ import (
// 阶段处理
type Stage struct {
Name string
In <-chan *common_models.ProcessData
processFuncs []func(*common_models.ProcessData) *common_models.ProcessData
Out chan *common_models.ProcessData
In <-chan []*common_models.ProcessData
processFuncs []func([]*common_models.ProcessData) []*common_models.ProcessData
Out chan []*common_models.ProcessData
execOver chan bool //阶段执行完毕,用以排查超时
}
func NewStage(name string) *Stage {
stageBufSize := configLoad.LoadConfig().GetInt64("performance.node.stageBufSize")
return &Stage{
Name: name,
processFuncs: make([]func(*common_models.ProcessData) *common_models.ProcessData, 0),
In: make(<-chan *common_models.ProcessData, 1),
Out: make(chan *common_models.ProcessData, 1),
processFuncs: make([]func([]*common_models.ProcessData) []*common_models.ProcessData, 0),
In: make(<-chan []*common_models.ProcessData, stageBufSize),
Out: make(chan []*common_models.ProcessData, stageBufSize),
execOver: make(chan bool, 1),
}
}
func (s *Stage) StageRun() <-chan *common_models.ProcessData {
func (s *Stage) StageRun() <-chan []*common_models.ProcessData {
go func() {
defer func() {
close(s.Out)
log.Printf("[%s]关闭out", s.Name)
}()
for n := range s.In {
//log.Printf("[%s]处理数据 %v", s.Name, n.DeviceData.Name)
s.Out <- s.process(n)
log.Printf("[%s]接收数据 In[%p] 通道长度=%d/%d", s.Name, s.In, len(s.In), cap(s.In))
result := s.process(n)
s.Out <- result
}
log.Printf("%s over", s.Name)
}()
@ -41,27 +47,30 @@ func (s *Stage) StageRun() <-chan *common_models.ProcessData {
return s.Out
}
// AddProcess 添加处理者。处理函数定义 func(*ProcessData) *ProcessData
func (s *Stage) AddProcess(fun func(*common_models.ProcessData) *common_models.ProcessData) {
func (s *Stage) AddProcess(fun func([]*common_models.ProcessData) []*common_models.ProcessData) {
s.processFuncs = append(s.processFuncs, fun)
}
func (s *Stage) process(data *common_models.ProcessData) *common_models.ProcessData {
go s.handlerTimeOutCheck(s.Name, data.DeviceData.DeviceId)
func (s *Stage) process(data []*common_models.ProcessData) []*common_models.ProcessData {
go s.handlerTimeOutCheck(s.Name, "批量处理耗时跟踪")
for _, processFunc := range s.processFuncs {
//tag := fmt.Sprintf("%d/%d", i+1, len(s.processFuncs))
//log.Printf("stage[%s]流程处理 start=> %s", s.Name, data.DeviceData.Name)
log.Printf("stage[%s] start, len(data)=%d", s.Name, len(data))
func() {
defer timeCost(s.Name, data.DeviceData.DeviceId, time.Now())
defer timeCost(s.Name, fmt.Sprintf("ProcessData数组元素个数[%d]", len(data)), time.Now())
data = processFunc(data)
}()
//log.Printf("stage[%s]流程处理 over=> %s", s.Name, data.DeviceData.Name)
log.Printf("stage[%s] over, len(data)=%d", s.Name, len(data))
}
s.execOver <- true
return data
}
func (s *Stage) handlerTimeOutCheck(stageName, deviceId string) {
defaultTimeout := 240 * time.Second // 4分钟
stageTimeout := configLoad.LoadConfig().GetInt64("performance.node.stageTimeout")
defaultTimeout := time.Duration(stageTimeout) * time.Second
select {
case <-s.execOver:
case <-time.After(defaultTimeout):

49
node/stages/stageManage.go

@ -3,46 +3,73 @@ package stages
import (
"gitea.anxinyun.cn/container/common_models"
"log"
"sync"
)
type StageManager struct {
in <-chan *common_models.ProcessData
Stages []Stage
out <-chan *common_models.ProcessData
in chan []*common_models.ProcessData
Stages []Stage
out chan []*common_models.ProcessData
resultChan chan []*common_models.ProcessData // 下一个处理环节的输入通道
}
func NewStageManager() *StageManager {
return &StageManager{}
func NewStageManager(resultChan chan []*common_models.ProcessData) *StageManager {
return &StageManager{
resultChan: resultChan,
}
}
func (the *StageManager) Run() {
func (the *StageManager) RunStages() {
if len(the.Stages) == 0 {
log.Panicf("Stages.len=%d 无有效处理流程", len(the.Stages))
}
// 连接阶段
for i := 0; i < len(the.Stages); i++ {
if i == 0 {
the.Stages[i].In = the.in
} else {
the.Stages[i].In = the.Stages[i-1].Out
}
the.Stages[i].StageRun()
the.Stages[i].StageRun() // 将启动环节协程
}
the.out = the.Stages[len(the.Stages)-1].Out
//todo 替换为sink
log.Printf("********* stageManage.go the.out=%p", the.out)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for {
<-the.out
select {
case _, ok := <-the.out:
if !ok {
log.Printf("StageManager: out channel closed")
return
}
}
}
}()
wg.Wait()
}
func (the *StageManager) AddSource(source <-chan *common_models.ProcessData) {
func (the *StageManager) AddSource(source chan []*common_models.ProcessData) {
log.Printf("stageManager add source=%p 通道数据:%d/%d", source, len(source), cap(source))
the.in = source
}
func (the *StageManager) AddOut(source chan *common_models.ProcessData) {
func (the *StageManager) GetOut() chan []*common_models.ProcessData {
return the.out
}
func (the *StageManager) GetIn() chan []*common_models.ProcessData {
return the.in
}
func (the *StageManager) AddOut(source chan []*common_models.ProcessData) {
the.out = source
}

80
node/stages/stage_test.go

@ -1,55 +1,53 @@
package stages
import (
"fmt"
"gitea.anxinyun.cn/container/common_models"
"log"
"testing"
"time"
)
func TestStageProcess(t *testing.T) {
log.SetFlags(log.Lshortfile | log.Lmicroseconds)
stage1 := NewStage("打印DeviceId")
stage1.AddProcess(printDeviceId)
stage2 := NewStage("打印DeviceName")
stage2.AddProcess(printDeviceName)
sm := NewStageManager()
sm.AddStages(*stage1, *stage2)
source := make(chan *common_models.ProcessData, 1)
sm.AddSource(source)
sm.Run()
i := 0
go func() {
for {
i++
time.Sleep(time.Second * 2)
deviceId := fmt.Sprintf("%d", i)
pd := &common_models.ProcessData{
DeviceData: common_models.DeviceData{
DeviceId: deviceId,
Name: "Name-" + deviceId,
ThingId: "",
StructId: 0,
TaskId: "",
AcqTime: time.Time{},
RealTime: time.Time{},
ErrCode: 0,
Raw: nil,
DeviceInfo: common_models.DeviceInfo{},
DimensionId: "",
},
Stations: []common_models.Station{},
}
source <- pd
log.Printf("进入数据 %d", i)
}
}()
for {
sinkData := <-sm.out
log.Printf("最终数据 %s", sinkData.DeviceData.Name)
}
//stage1 := NewStage("打印DeviceId")
//stage1.AddProcess(printDeviceId)
//stage2 := NewStage("打印DeviceName")
//stage2.AddProcess(printDeviceName)
//sm := NewStageManager()
//sm.AddStages(*stage1, *stage2)
//source := make(chan *common_models.ProcessData, 1)
//sm.AddSource(source)
//sm.Run()
//i := 0
//go func() {
// for {
// i++
// time.Sleep(time.Second * 2)
// deviceId := fmt.Sprintf("%d", i)
// pd := &common_models.ProcessData{
// DeviceData: common_models.DeviceData{
// DeviceId: deviceId,
// Name: "Name-" + deviceId,
// ThingId: "",
// StructId: 0,
// TaskId: "",
// AcqTime: time.Time{},
// RealTime: time.Time{},
// ErrCode: 0,
// Raw: nil,
// DeviceInfo: common_models.DeviceInfo{},
// DimensionId: "",
// },
// Stations: []common_models.Station{},
// }
// source <- pd
// log.Printf("进入数据 %d", i)
// }
//}()
//for {
// sinkData := <-sm.out
// log.Printf("最终数据 %s", sinkData.DeviceData.Name)
//}
}
func printDeviceId(p *common_models.ProcessData) *common_models.ProcessData {

Loading…
Cancel
Save