Compare commits
17 Commits
8353021d0e
...
3a1ef52537
Author | SHA1 | Date |
---|---|---|
|
3a1ef52537 | 2 months ago |
|
c6f66e56ce | 2 months ago |
|
da861825af | 2 months ago |
|
4b99b58420 | 2 months ago |
|
e35519c889 | 2 months ago |
|
ecf0f09c4d | 2 months ago |
|
4f00694cea | 2 months ago |
|
c14fc07f36 | 2 months ago |
|
5274bf11bf | 2 months ago |
|
18715f86c9 | 2 months ago |
|
9d87b21b43 | 2 months ago |
|
3e421eae60 | 2 months ago |
|
35d6142e16 | 2 months ago |
|
62668afdda | 2 months ago |
|
a68d032eb8 | 2 months ago |
|
7a07f7fe96 | 2 months ago |
|
4b630acda1 | 2 months ago |
43 changed files with 5470 additions and 874 deletions
@ -0,0 +1,15 @@ |
|||
module et_rpc |
|||
|
|||
go 1.23.1 |
|||
|
|||
require ( |
|||
google.golang.org/grpc v1.69.4 |
|||
google.golang.org/protobuf v1.36.2 |
|||
) |
|||
|
|||
require ( |
|||
golang.org/x/net v0.30.0 // indirect |
|||
golang.org/x/sys v0.26.0 // indirect |
|||
golang.org/x/text v0.19.0 // indirect |
|||
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect |
|||
) |
@ -0,0 +1,32 @@ |
|||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= |
|||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= |
|||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= |
|||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= |
|||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= |
|||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= |
|||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= |
|||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= |
|||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= |
|||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= |
|||
go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= |
|||
go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= |
|||
go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= |
|||
go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= |
|||
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= |
|||
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= |
|||
go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc= |
|||
go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8= |
|||
go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= |
|||
go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= |
|||
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= |
|||
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= |
|||
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= |
|||
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= |
|||
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= |
|||
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= |
|||
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 h1:X58yt85/IXCx0Y3ZwN6sEIKZzQtDEYaBWrDvErdXrRE= |
|||
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= |
|||
google.golang.org/grpc v1.69.4 h1:MF5TftSMkd8GLw/m0KM6V8CMOCY6NZ1NQDPGFgbTt4A= |
|||
google.golang.org/grpc v1.69.4/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= |
|||
google.golang.org/protobuf v1.36.2 h1:R8FeyR1/eLmkutZOM5CWghmo5itiG9z0ktFlTVLuTmU= |
|||
google.golang.org/protobuf v1.36.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= |
File diff suppressed because it is too large
@ -0,0 +1,349 @@ |
|||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
|||
// versions:
|
|||
// - protoc-gen-go-grpc v1.5.1
|
|||
// - protoc v5.26.1
|
|||
// source: et_rpc.proto
|
|||
|
|||
package pb |
|||
|
|||
import ( |
|||
context "context" |
|||
grpc "google.golang.org/grpc" |
|||
codes "google.golang.org/grpc/codes" |
|||
status "google.golang.org/grpc/status" |
|||
) |
|||
|
|||
// This is a compile-time assertion to ensure that this generated file
|
|||
// is compatible with the grpc package it is being compiled against.
|
|||
// Requires gRPC-Go v1.64.0 or later.
|
|||
const _ = grpc.SupportPackageIsVersion9 |
|||
|
|||
const ( |
|||
NodeService_HandleIotaData_FullMethodName = "/et_rpc.NodeService/HandleIotaData" |
|||
NodeService_HandleAggData_FullMethodName = "/et_rpc.NodeService/HandleAggData" |
|||
) |
|||
|
|||
// NodeServiceClient is the client API for NodeService service.
|
|||
//
|
|||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
|||
//
|
|||
// NodeService 定义
|
|||
type NodeServiceClient interface { |
|||
// 处理 Iota 数据并返回节点响应
|
|||
HandleIotaData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) |
|||
// 处理聚集数据并返回节点响应
|
|||
HandleAggData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) |
|||
} |
|||
|
|||
type nodeServiceClient struct { |
|||
cc grpc.ClientConnInterface |
|||
} |
|||
|
|||
func NewNodeServiceClient(cc grpc.ClientConnInterface) NodeServiceClient { |
|||
return &nodeServiceClient{cc} |
|||
} |
|||
|
|||
func (c *nodeServiceClient) HandleIotaData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) { |
|||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) |
|||
out := new(HandleDataResponse) |
|||
err := c.cc.Invoke(ctx, NodeService_HandleIotaData_FullMethodName, in, out, cOpts...) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return out, nil |
|||
} |
|||
|
|||
func (c *nodeServiceClient) HandleAggData(ctx context.Context, in *HandleDataRequest, opts ...grpc.CallOption) (*HandleDataResponse, error) { |
|||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) |
|||
out := new(HandleDataResponse) |
|||
err := c.cc.Invoke(ctx, NodeService_HandleAggData_FullMethodName, in, out, cOpts...) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return out, nil |
|||
} |
|||
|
|||
// NodeServiceServer is the server API for NodeService service.
|
|||
// All implementations must embed UnimplementedNodeServiceServer
|
|||
// for forward compatibility.
|
|||
//
|
|||
// NodeService 定义
|
|||
type NodeServiceServer interface { |
|||
// 处理 Iota 数据并返回节点响应
|
|||
HandleIotaData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) |
|||
// 处理聚集数据并返回节点响应
|
|||
HandleAggData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) |
|||
mustEmbedUnimplementedNodeServiceServer() |
|||
} |
|||
|
|||
// UnimplementedNodeServiceServer must be embedded to have
|
|||
// forward compatible implementations.
|
|||
//
|
|||
// NOTE: this should be embedded by value instead of pointer to avoid a nil
|
|||
// pointer dereference when methods are called.
|
|||
type UnimplementedNodeServiceServer struct{} |
|||
|
|||
func (UnimplementedNodeServiceServer) HandleIotaData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) { |
|||
return nil, status.Errorf(codes.Unimplemented, "method HandleIotaData not implemented") |
|||
} |
|||
func (UnimplementedNodeServiceServer) HandleAggData(context.Context, *HandleDataRequest) (*HandleDataResponse, error) { |
|||
return nil, status.Errorf(codes.Unimplemented, "method HandleAggData not implemented") |
|||
} |
|||
func (UnimplementedNodeServiceServer) mustEmbedUnimplementedNodeServiceServer() {} |
|||
func (UnimplementedNodeServiceServer) testEmbeddedByValue() {} |
|||
|
|||
// UnsafeNodeServiceServer may be embedded to opt out of forward compatibility for this service.
|
|||
// Use of this interface is not recommended, as added methods to NodeServiceServer will
|
|||
// result in compilation errors.
|
|||
type UnsafeNodeServiceServer interface { |
|||
mustEmbedUnimplementedNodeServiceServer() |
|||
} |
|||
|
|||
func RegisterNodeServiceServer(s grpc.ServiceRegistrar, srv NodeServiceServer) { |
|||
// If the following call pancis, it indicates UnimplementedNodeServiceServer was
|
|||
// embedded by pointer and is nil. This will cause panics if an
|
|||
// unimplemented method is ever invoked, so we test this at initialization
|
|||
// time to prevent it from happening at runtime later due to I/O.
|
|||
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { |
|||
t.testEmbeddedByValue() |
|||
} |
|||
s.RegisterService(&NodeService_ServiceDesc, srv) |
|||
} |
|||
|
|||
func _NodeService_HandleIotaData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
|||
in := new(HandleDataRequest) |
|||
if err := dec(in); err != nil { |
|||
return nil, err |
|||
} |
|||
if interceptor == nil { |
|||
return srv.(NodeServiceServer).HandleIotaData(ctx, in) |
|||
} |
|||
info := &grpc.UnaryServerInfo{ |
|||
Server: srv, |
|||
FullMethod: NodeService_HandleIotaData_FullMethodName, |
|||
} |
|||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
|||
return srv.(NodeServiceServer).HandleIotaData(ctx, req.(*HandleDataRequest)) |
|||
} |
|||
return interceptor(ctx, in, info, handler) |
|||
} |
|||
|
|||
func _NodeService_HandleAggData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
|||
in := new(HandleDataRequest) |
|||
if err := dec(in); err != nil { |
|||
return nil, err |
|||
} |
|||
if interceptor == nil { |
|||
return srv.(NodeServiceServer).HandleAggData(ctx, in) |
|||
} |
|||
info := &grpc.UnaryServerInfo{ |
|||
Server: srv, |
|||
FullMethod: NodeService_HandleAggData_FullMethodName, |
|||
} |
|||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
|||
return srv.(NodeServiceServer).HandleAggData(ctx, req.(*HandleDataRequest)) |
|||
} |
|||
return interceptor(ctx, in, info, handler) |
|||
} |
|||
|
|||
// NodeService_ServiceDesc is the grpc.ServiceDesc for NodeService service.
|
|||
// It's only intended for direct use with grpc.RegisterService,
|
|||
// and not to be introspected or modified (even as a copy)
|
|||
var NodeService_ServiceDesc = grpc.ServiceDesc{ |
|||
ServiceName: "et_rpc.NodeService", |
|||
HandlerType: (*NodeServiceServer)(nil), |
|||
Methods: []grpc.MethodDesc{ |
|||
{ |
|||
MethodName: "HandleIotaData", |
|||
Handler: _NodeService_HandleIotaData_Handler, |
|||
}, |
|||
{ |
|||
MethodName: "HandleAggData", |
|||
Handler: _NodeService_HandleAggData_Handler, |
|||
}, |
|||
}, |
|||
Streams: []grpc.StreamDesc{}, |
|||
Metadata: "et_rpc.proto", |
|||
} |
|||
|
|||
const ( |
|||
MasterService_RegisterNode_FullMethodName = "/et_rpc.MasterService/RegisterNode" |
|||
MasterService_HeartbeatNode_FullMethodName = "/et_rpc.MasterService/HeartbeatNode" |
|||
MasterService_UnregisterNode_FullMethodName = "/et_rpc.MasterService/UnregisterNode" |
|||
) |
|||
|
|||
// MasterServiceClient is the client API for MasterService service.
|
|||
//
|
|||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
|||
//
|
|||
// MasterService 定义
|
|||
type MasterServiceClient interface { |
|||
RegisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) |
|||
HeartbeatNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) |
|||
UnregisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) |
|||
} |
|||
|
|||
type masterServiceClient struct { |
|||
cc grpc.ClientConnInterface |
|||
} |
|||
|
|||
func NewMasterServiceClient(cc grpc.ClientConnInterface) MasterServiceClient { |
|||
return &masterServiceClient{cc} |
|||
} |
|||
|
|||
func (c *masterServiceClient) RegisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) { |
|||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) |
|||
out := new(RpcResponse) |
|||
err := c.cc.Invoke(ctx, MasterService_RegisterNode_FullMethodName, in, out, cOpts...) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return out, nil |
|||
} |
|||
|
|||
func (c *masterServiceClient) HeartbeatNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) { |
|||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) |
|||
out := new(RpcResponse) |
|||
err := c.cc.Invoke(ctx, MasterService_HeartbeatNode_FullMethodName, in, out, cOpts...) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return out, nil |
|||
} |
|||
|
|||
func (c *masterServiceClient) UnregisterNode(ctx context.Context, in *NodeRequest, opts ...grpc.CallOption) (*RpcResponse, error) { |
|||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) |
|||
out := new(RpcResponse) |
|||
err := c.cc.Invoke(ctx, MasterService_UnregisterNode_FullMethodName, in, out, cOpts...) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return out, nil |
|||
} |
|||
|
|||
// MasterServiceServer is the server API for MasterService service.
|
|||
// All implementations must embed UnimplementedMasterServiceServer
|
|||
// for forward compatibility.
|
|||
//
|
|||
// MasterService 定义
|
|||
type MasterServiceServer interface { |
|||
RegisterNode(context.Context, *NodeRequest) (*RpcResponse, error) |
|||
HeartbeatNode(context.Context, *NodeRequest) (*RpcResponse, error) |
|||
UnregisterNode(context.Context, *NodeRequest) (*RpcResponse, error) |
|||
mustEmbedUnimplementedMasterServiceServer() |
|||
} |
|||
|
|||
// UnimplementedMasterServiceServer must be embedded to have
|
|||
// forward compatible implementations.
|
|||
//
|
|||
// NOTE: this should be embedded by value instead of pointer to avoid a nil
|
|||
// pointer dereference when methods are called.
|
|||
type UnimplementedMasterServiceServer struct{} |
|||
|
|||
func (UnimplementedMasterServiceServer) RegisterNode(context.Context, *NodeRequest) (*RpcResponse, error) { |
|||
return nil, status.Errorf(codes.Unimplemented, "method RegisterNode not implemented") |
|||
} |
|||
func (UnimplementedMasterServiceServer) HeartbeatNode(context.Context, *NodeRequest) (*RpcResponse, error) { |
|||
return nil, status.Errorf(codes.Unimplemented, "method HeartbeatNode not implemented") |
|||
} |
|||
func (UnimplementedMasterServiceServer) UnregisterNode(context.Context, *NodeRequest) (*RpcResponse, error) { |
|||
return nil, status.Errorf(codes.Unimplemented, "method UnregisterNode not implemented") |
|||
} |
|||
func (UnimplementedMasterServiceServer) mustEmbedUnimplementedMasterServiceServer() {} |
|||
func (UnimplementedMasterServiceServer) testEmbeddedByValue() {} |
|||
|
|||
// UnsafeMasterServiceServer may be embedded to opt out of forward compatibility for this service.
|
|||
// Use of this interface is not recommended, as added methods to MasterServiceServer will
|
|||
// result in compilation errors.
|
|||
type UnsafeMasterServiceServer interface { |
|||
mustEmbedUnimplementedMasterServiceServer() |
|||
} |
|||
|
|||
func RegisterMasterServiceServer(s grpc.ServiceRegistrar, srv MasterServiceServer) { |
|||
// If the following call pancis, it indicates UnimplementedMasterServiceServer was
|
|||
// embedded by pointer and is nil. This will cause panics if an
|
|||
// unimplemented method is ever invoked, so we test this at initialization
|
|||
// time to prevent it from happening at runtime later due to I/O.
|
|||
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { |
|||
t.testEmbeddedByValue() |
|||
} |
|||
s.RegisterService(&MasterService_ServiceDesc, srv) |
|||
} |
|||
|
|||
func _MasterService_RegisterNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
|||
in := new(NodeRequest) |
|||
if err := dec(in); err != nil { |
|||
return nil, err |
|||
} |
|||
if interceptor == nil { |
|||
return srv.(MasterServiceServer).RegisterNode(ctx, in) |
|||
} |
|||
info := &grpc.UnaryServerInfo{ |
|||
Server: srv, |
|||
FullMethod: MasterService_RegisterNode_FullMethodName, |
|||
} |
|||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
|||
return srv.(MasterServiceServer).RegisterNode(ctx, req.(*NodeRequest)) |
|||
} |
|||
return interceptor(ctx, in, info, handler) |
|||
} |
|||
|
|||
func _MasterService_HeartbeatNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
|||
in := new(NodeRequest) |
|||
if err := dec(in); err != nil { |
|||
return nil, err |
|||
} |
|||
if interceptor == nil { |
|||
return srv.(MasterServiceServer).HeartbeatNode(ctx, in) |
|||
} |
|||
info := &grpc.UnaryServerInfo{ |
|||
Server: srv, |
|||
FullMethod: MasterService_HeartbeatNode_FullMethodName, |
|||
} |
|||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
|||
return srv.(MasterServiceServer).HeartbeatNode(ctx, req.(*NodeRequest)) |
|||
} |
|||
return interceptor(ctx, in, info, handler) |
|||
} |
|||
|
|||
func _MasterService_UnregisterNode_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
|||
in := new(NodeRequest) |
|||
if err := dec(in); err != nil { |
|||
return nil, err |
|||
} |
|||
if interceptor == nil { |
|||
return srv.(MasterServiceServer).UnregisterNode(ctx, in) |
|||
} |
|||
info := &grpc.UnaryServerInfo{ |
|||
Server: srv, |
|||
FullMethod: MasterService_UnregisterNode_FullMethodName, |
|||
} |
|||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
|||
return srv.(MasterServiceServer).UnregisterNode(ctx, req.(*NodeRequest)) |
|||
} |
|||
return interceptor(ctx, in, info, handler) |
|||
} |
|||
|
|||
// MasterService_ServiceDesc is the grpc.ServiceDesc for MasterService service.
|
|||
// It's only intended for direct use with grpc.RegisterService,
|
|||
// and not to be introspected or modified (even as a copy)
|
|||
var MasterService_ServiceDesc = grpc.ServiceDesc{ |
|||
ServiceName: "et_rpc.MasterService", |
|||
HandlerType: (*MasterServiceServer)(nil), |
|||
Methods: []grpc.MethodDesc{ |
|||
{ |
|||
MethodName: "RegisterNode", |
|||
Handler: _MasterService_RegisterNode_Handler, |
|||
}, |
|||
{ |
|||
MethodName: "HeartbeatNode", |
|||
Handler: _MasterService_HeartbeatNode_Handler, |
|||
}, |
|||
{ |
|||
MethodName: "UnregisterNode", |
|||
Handler: _MasterService_UnregisterNode_Handler, |
|||
}, |
|||
}, |
|||
Streams: []grpc.StreamDesc{}, |
|||
Metadata: "et_rpc.proto", |
|||
} |
@ -0,0 +1,124 @@ |
|||
syntax = "proto3"; |
|||
|
|||
package et_rpc; |
|||
option go_package = "/pb"; |
|||
|
|||
message NodeArgs{ |
|||
string id = 1; // 节点ID |
|||
string addr = 2; // 节点地址 |
|||
int32 load = 3; // 节点荷载 |
|||
string resource_json = 4; // 资源使用情况JSON |
|||
int32 weight = 5; // 权重 |
|||
NodeState status = 6; // 节点状态 |
|||
RPCReplyCode err_code = 7; // 错误代码 |
|||
string err_message = 8; // 错误信息 |
|||
} |
|||
|
|||
message NodeResponse { |
|||
string id = 1; |
|||
string addr = 2; |
|||
RPCReplyCode err_code = 3; |
|||
string err_message = 4; |
|||
} |
|||
|
|||
message NodeRegistrationRequest { |
|||
string node_id = 1; |
|||
string node_addr = 2; |
|||
} |
|||
|
|||
message NodeStatusRequest { |
|||
string node_id = 1; |
|||
} |
|||
|
|||
message NodeStatusResponse { |
|||
string node_id = 1; |
|||
string status = 2; // 例如 "active", "inactive" |
|||
} |
|||
|
|||
// 枚举定义 |
|||
enum NodeState { |
|||
UNKNOWN = 0; |
|||
ACTIVE = 1; |
|||
INACTIVE = 2; |
|||
} |
|||
|
|||
enum RPCReplyCode { |
|||
SUCCESS = 0; |
|||
FAILURE = 1; |
|||
} |
|||
|
|||
// 定义键值对消息 |
|||
message KeyValue { |
|||
string key = 1; // 键 |
|||
double value = 2; // 值 |
|||
} |
|||
|
|||
// 定义AggData消息 |
|||
message AggData { |
|||
string date = 1; // 使用protobuf的时间戳类型 |
|||
int32 sensor_id = 2; // SensorId |
|||
int32 struct_id = 3; // StructId |
|||
int32 factor_id = 4; // FactorId |
|||
int32 agg_type_id = 5; // 聚集类型 |
|||
int32 agg_method_id = 6; // 聚集方法 |
|||
repeated KeyValue agg = 7; // 聚集数据 |
|||
repeated KeyValue changed = 8; // 变化量 |
|||
string thing_id = 9; // ThingId |
|||
} |
|||
|
|||
|
|||
|
|||
message NodeRequest{ |
|||
string id = 1; |
|||
string address = 2; |
|||
repeated string thing_ids = 3; |
|||
} |
|||
|
|||
message RpcResponse { |
|||
enum Status { |
|||
SUCCESS = 0; // 请求成功 |
|||
FAILURE = 1; // 请求失败 |
|||
INVALID_ARGUMENT = 2; // 无效参数 |
|||
NOT_FOUND = 3; // 未找到 |
|||
INTERNAL_ERROR = 4; // 内部错误 |
|||
// 可以根据需要添加更多状态 |
|||
} |
|||
|
|||
Status status = 1; // 请求状态 |
|||
string error_message = 2; // 错误信息,如果请求失败则返回具体的错误信息 |
|||
} |
|||
|
|||
message HandleDataResponse{ |
|||
enum Status { |
|||
SUCCESS = 0; // 请求成功 |
|||
FAILURE = 1; // 请求失败 |
|||
INVALID_ARGUMENT = 2; // 无效参数 |
|||
INTERNAL_ERROR = 4; // 内部错误 |
|||
// 可以根据需要添加更多状态 |
|||
} |
|||
|
|||
string addr = 1; // 节点地址 |
|||
int32 load = 2; // 节点荷载 |
|||
Status status = 3; // 请求状态 |
|||
string error_message = 4; // 错误信息,如果请求失败则返回具体的错误信息 |
|||
} |
|||
|
|||
message HandleDataRequest { |
|||
string id = 1; |
|||
repeated string messages = 2; |
|||
} |
|||
|
|||
// NodeService 定义 |
|||
service NodeService { |
|||
// 处理 Iota 数据并返回节点响应 |
|||
rpc HandleIotaData(HandleDataRequest) returns (HandleDataResponse); |
|||
// 处理聚集数据并返回节点响应 |
|||
rpc HandleAggData(HandleDataRequest) returns (HandleDataResponse); |
|||
} |
|||
|
|||
// MasterService 定义 |
|||
service MasterService { |
|||
rpc RegisterNode(NodeRequest) returns (RpcResponse); |
|||
rpc HeartbeatNode(NodeRequest) returns (RpcResponse); |
|||
rpc UnregisterNode(NodeRequest) returns (RpcResponse); |
|||
} |
@ -0,0 +1,58 @@ |
|||
gRPC https://grpc.org.cn/docs/guides/cancellation/ |
|||
|
|||
|
|||
go get google.golang.org/grpc |
|||
go get google.golang.org/protobuf/cmd/protoc-gen-go |
|||
go get google.golang.org/grpc/cmd/protoc-gen-go-grpc |
|||
|
|||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest |
|||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest |
|||
|
|||
生成代码出错,问题解决: |
|||
查找 |
|||
go list -m all | findstr grpc |
|||
go list -m all | findstr protobuf |
|||
|
|||
移除不必要的依赖 |
|||
go get -u github.com/gogo/protobuf@nonego get -u github.com/golang/protobuf@none |
|||
go get -u github.com/matttproud/golang_protobuf_extensions@none |
|||
|
|||
更新 go.mod |
|||
go mod tidy |
|||
|
|||
重新生成代码,打开命令行执行以下命令: |
|||
cd et_rpc |
|||
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative --go-grpc_out=./pb --go-grpc_opt=paths=source_relative proto/*.proto |
|||
|
|||
============================================================== |
|||
|
|||
1. 使用 Protobuf 编译器 protoc 生成 Go 代码 |
|||
基本的 Protobuf 编译指令 protoc --proto_path=IMPORT_PATH --go_out=OUTPUT_PATH --go_opt=paths=source_relative your_proto_file.proto |
|||
参数说明 |
|||
--proto_path=IMPORT_PATH:指定 .proto 文件的搜索路径。可以使用多个 --proto_path 选项来指定多个路径。 |
|||
--go_out=OUTPUT_PATH:指定生成的 Go 代码的输出路径。可以使用 . 表示当前目录。 |
|||
--go_opt=paths=source_relative:这个选项使生成的 Go 文件的包路径与 .proto 文件的相对路径一致,通常推荐使用。 |
|||
your_proto_file.proto:要编译的 .proto 文件的名称。 |
|||
|
|||
***** ET-GO 系统的编译指令(cd 致 et-go/et_rpc 下执行) |
|||
1) 只生成消息类型的Go代码、序列化和反序列化方法、其他辅助方法: |
|||
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative proto/*.proto |
|||
2)生成消息类型Go代码、序列化和反序列化方法、gRPC服务的Go代码、gRPC方法的实现: |
|||
protoc --proto_path=proto --go_out=./pb --go_opt=paths=source_relative --go-grpc_out=./pb --go-grpc_opt=paths=source_relative proto/*.proto |
|||
|
|||
参数说明 |
|||
--proto_path=proto:指定 .proto 文件的搜索路径为 proto 目录。 |
|||
--go_out=./pb:指定生成的 Go 代码输出到 pb 目录。 |
|||
--go_opt=paths=source_relative:确保生成的 Go 文件的包路径与 .proto 文件的相对路径一致。 |
|||
proto/*.proto:使用通配符 *.proto 来编译 proto 目录下的所有 .proto 文件。 |
|||
|
|||
/et-go |
|||
├── et_rpc |
|||
│ ├── pb # 存放生成的 Go 文件 |
|||
│ │ └── iota_data.pb.go # 生成的 Go 文件 |
|||
│ └── proto # 存放 Protobuf 文件 |
|||
│ └── iota_data.proto # IotaData Protobuf 文件 |
|||
└── 其他 |
|||
|
|||
|
|||
|
@ -0,0 +1,22 @@ |
|||
package et_rpc |
|||
|
|||
type RPCReplyCode int |
|||
|
|||
type NodeState int |
|||
|
|||
const ( |
|||
NodeState_Healthy NodeState = iota |
|||
NodeState_Unhealthy |
|||
) |
|||
|
|||
type NodeArgs struct { |
|||
ID string |
|||
Addr string |
|||
Load int // 节点荷载(主要为积压的数据量)
|
|||
ResourceJson string // CPU\内存\硬盘 使用情况JSON
|
|||
Weight int // 权重(暂时未用到)
|
|||
Status NodeState // 节点状态(健康 | 不健康)
|
|||
|
|||
ErrCode RPCReplyCode // RPCReply_Success | RPCReply_Failure
|
|||
ErrMessage string |
|||
} |
@ -0,0 +1,57 @@ |
|||
package et_rpc |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"google.golang.org/protobuf/proto" |
|||
"log" |
|||
"testing" |
|||
) |
|||
|
|||
// TestProtoJSONConversion 测试 Protobuf 和 JSON 之间的转换
|
|||
func TestProtoJSONConversion(t *testing.T) { |
|||
// 创建请求
|
|||
request := createHandleDataRequest() |
|||
|
|||
// 序列化请求为 Protobuf 格式
|
|||
data, err := proto.Marshal(request) |
|||
if err != nil { |
|||
log.Fatalf("Failed to marshal request: %v", err) |
|||
} |
|||
|
|||
// 打印序列化后的数据
|
|||
fmt.Println("Serialized HandleDataRequest:", data) |
|||
|
|||
// 这里可以将序列化后的数据发送到 gRPC 服务
|
|||
// 例如:client.HandleIotaData(context.Background(), request)
|
|||
} |
|||
|
|||
func createJSONData(id string, value float64) (string, error) { |
|||
data := map[string]interface{}{ |
|||
"id": id, |
|||
"value": value, |
|||
} |
|||
|
|||
jsonData, err := json.Marshal(data) |
|||
if err != nil { |
|||
return "", err |
|||
} |
|||
|
|||
return string(jsonData), nil |
|||
} |
|||
|
|||
func createHandleDataRequest() *pb.HandleDataRequest { |
|||
request := &pb.HandleDataRequest{} |
|||
|
|||
// 添加 JSON 数据消息
|
|||
json1, _ := createJSONData("1", 10.5) |
|||
json2, _ := createJSONData("2", 20.3) |
|||
json3, _ := createJSONData("3", 15.8) |
|||
|
|||
request.Messages = append(request.Messages, json1) |
|||
request.Messages = append(request.Messages, json2) |
|||
request.Messages = append(request.Messages, json3) |
|||
|
|||
return request |
|||
} |
@ -1,403 +1,398 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"dataSource" |
|||
"encoding/gob" |
|||
"errors" |
|||
"et_prometheus_exporter" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"github.com/panjf2000/ants/v2" |
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/health" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"math" |
|||
"master/data_source" |
|||
"master/node_manager" |
|||
"net" |
|||
"net/rpc" |
|||
"strings" |
|||
"os" |
|||
"os/signal" |
|||
"sync" |
|||
"sync/atomic" |
|||
"syscall" |
|||
"time" |
|||
) |
|||
|
|||
type EtMaster struct { |
|||
nodeMap sync.Map |
|||
exporter et_prometheus_exporter.PrometheusExporter |
|||
sleepCH chan bool |
|||
type SendStatus struct { |
|||
inProgressCount int32 // 正在处理的消息计数
|
|||
limitThreshold int32 // 限流阈值
|
|||
receiving int32 // 是否接收消息(1表示接收,0表示暂停)
|
|||
} |
|||
|
|||
func NewEtMaster() *EtMaster { |
|||
master := EtMaster{ |
|||
exporter: et_prometheus_exporter.NewPrometheusExporter(), |
|||
sleepCH: make(chan bool, 1), |
|||
} |
|||
return &master |
|||
} |
|||
// ETMaster 管理 Master 的核心逻辑
|
|||
type ETMaster struct { |
|||
nodeManager *node_manager.NodeManager |
|||
grpcServer *grpc.Server |
|||
masterRPCService *MasterRPCService |
|||
|
|||
type NodeRpc struct { |
|||
args *common_models.NodeArgs // 注册节点参数:RPC服务名为master, 服务方法 NodeRegister 的输入参数
|
|||
resultCH chan int // 注册节点参数:RPC服务名为master, 服务方法 NodeRegister 的输出结果
|
|||
aggResultCH chan int // 聚集数据被处理后的返回结果 对应 Reply 参数
|
|||
client *rpc.Client |
|||
dataSource *data_source.KafkaDataSource |
|||
aggDataHandlers sync.Map // 聚合数据处理者
|
|||
rawDataHandlers sync.Map // 原始数据处理者
|
|||
aggSendStatus SendStatus // 聚合数据发送状态
|
|||
rawSendStatus SendStatus // 原始数据发送状态
|
|||
|
|||
errRawChan chan []string |
|||
errMessagesKafkaProducer *data_source.KafkaProducer // Kafka 生产者,用于发送失败的消息
|
|||
} |
|||
|
|||
// RegisterListen 启动 master RPC服务
|
|||
func (the *EtMaster) RegisterListen() { |
|||
//监听
|
|||
err := rpc.RegisterName("master", the) |
|||
if err != nil { |
|||
log.Println("master 提供注册服务异常") |
|||
return |
|||
// 创建 ETMaster 实例
|
|||
func NewETMaster() *ETMaster { |
|||
lb := node_manager.NewLoadBalancer(&node_manager.RoundRobinSelector{}) |
|||
nodeManager := node_manager.NewNodeManager(lb) |
|||
|
|||
grpcServer := grpc.NewServer() |
|||
masterRPCService := NewMasterRPCService(nodeManager) |
|||
pb.RegisterMasterServiceServer(grpcServer, masterRPCService) |
|||
|
|||
healthServer := health.NewServer() |
|||
grpc_health_v1.RegisterHealthServer(grpcServer, healthServer) |
|||
healthServer.SetServingStatus("MasterService", grpc_health_v1.HealthCheckResponse_SERVING) |
|||
|
|||
return &ETMaster{ |
|||
nodeManager: nodeManager, |
|||
grpcServer: grpcServer, |
|||
masterRPCService: masterRPCService, |
|||
} |
|||
} |
|||
|
|||
func (mm *ETMaster) StartRPCServer() { |
|||
port := configLoad.LoadConfig().GetUint16("master.port") |
|||
|
|||
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) |
|||
if err != nil { |
|||
log.Panic("master 启动 node服务注册功能异常") |
|||
log.Panicf("启动 Master RPC 服务失败: %v", err) |
|||
} |
|||
log.Printf("master 启动 node服务注册功能 :%d", port) |
|||
for { |
|||
//log.Println("master 监听新注册链接")
|
|||
conn, err := listener.Accept() |
|||
if err != nil { |
|||
log.Println("master rpc Accept异常") |
|||
defer func() { |
|||
if err := listener.Close(); err != nil { |
|||
log.Printf("关闭监听器失败: %v", err) |
|||
} |
|||
log.Printf("master Accept注册链接 from node[%s]", conn.RemoteAddr()) |
|||
go rpc.ServeConn(conn) |
|||
}() |
|||
log.Printf("启动 Master RPC 服务成功,服务端口:%d", port) |
|||
|
|||
// 启动 gRPC 服务器
|
|||
if err := mm.grpcServer.Serve(listener); err != nil { |
|||
log.Panicf("gRPC 服务器服务失败: %v", err) |
|||
} |
|||
} |
|||
|
|||
// DistributeData 分发数据。
|
|||
// 监听两个数据通道RawDataChan和AggDataChan,根据不同类型的数据通道接收到的数据,调用notifyData方法进行相应的处理操作。
|
|||
func (the *EtMaster) DistributeData(dataChannels *dataSource.DataChannels) { |
|||
//数据类型注册
|
|||
gob.Register([]interface{}{}) |
|||
for { |
|||
log.Println("L74 nodeCount: %d", the.nodeMapCount()) |
|||
if the.nodeMapCount() == 0 { |
|||
log.Printf("nodeList is empty!") |
|||
time.Sleep(time.Second * 10) |
|||
continue |
|||
} |
|||
// 初始化 Kafka 数据源
|
|||
func (mm *ETMaster) InitKafkaDataSource() { |
|||
ds := data_source.NewKafkaDataSource() // 加载 kafka 相关的配置
|
|||
|
|||
select { |
|||
case stopEnable := <-the.sleepCH: |
|||
log.Println("L83 nodeCount: %d", the.nodeMapCount()) |
|||
if stopEnable { |
|||
stopTime := time.Second * 10 |
|||
log.Printf("node 处理积压,%v,master 暂停 %v", stopEnable, stopTime) |
|||
time.Sleep(stopTime) |
|||
} else { |
|||
log.Printf("node 处理积压,%v,不正常空数据", stopEnable) |
|||
} |
|||
default: |
|||
// 创建 kafka 生产者实例
|
|||
producer, err := data_source.NewKafkaProducer(ds.Brokers) |
|||
if err != nil { |
|||
log.Fatalf("创建 Kafka 生产者失败: %v", err) |
|||
} |
|||
mm.errMessagesKafkaProducer = producer |
|||
|
|||
// 设置 rawData 的处理者,每个分区一个处理者
|
|||
if ds.Master_kafkaConsumer_config.RawData != nil { |
|||
topicCfg := ds.Topics["data_raw"] |
|||
for partId := 0; partId < topicCfg.Partitions; partId++ { |
|||
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId) |
|||
dataHandler := data_source.NewRawDataHandler(key, topicCfg.Topic, partId) |
|||
mm.rawDataHandlers.Store(key, dataHandler) |
|||
} |
|||
|
|||
select { |
|||
case data := <-dataChannels.RawDataChan: |
|||
log.Println("L96 nodeCount: %d", the.nodeMapCount()) |
|||
the.notifyData(&data, the.callNodeService) |
|||
case data := <-dataChannels.AggDataChan: |
|||
log.Println("L99 nodeCount: %d", the.nodeMapCount()) |
|||
the.notifyData(&data, the.callNodeService) |
|||
//default:
|
|||
// time.Sleep(100 * time.Millisecond)
|
|||
// 发送失败的消息存入 DLP_DATA_RAW 主题)
|
|||
dlpKey := "DLP_DATA_RAW" |
|||
mm.rawDataHandlers.Store(dlpKey, data_source.NewRawDataHandler(dlpKey, dlpKey, 0)) |
|||
} |
|||
|
|||
// 设置 aggData 的处理者,每个分区一个处理者
|
|||
if ds.Master_kafkaConsumer_config.AggData != nil { |
|||
topicCfg := ds.Topics["data_agg"] |
|||
for partId := 0; partId < topicCfg.Partitions; partId++ { |
|||
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId) |
|||
dataHandler := data_source.NewAggDataHandler(key, topicCfg.Topic, partId) |
|||
mm.aggDataHandlers.Store(key, dataHandler) |
|||
} |
|||
|
|||
// 发送失败的消息存入 DLP_DATA_AGG 主题
|
|||
dlpKey := "DLP_DATA_AGG" |
|||
mm.rawDataHandlers.Store(dlpKey, data_source.NewRawDataHandler(dlpKey, dlpKey, 0)) |
|||
} |
|||
|
|||
ds.RawDataHandlers = &mm.rawDataHandlers |
|||
ds.AggDataHandlers = &mm.aggDataHandlers |
|||
mm.dataSource = ds |
|||
} |
|||
|
|||
// 等待节点注册
|
|||
func (mm *ETMaster) WaitNodeRegister() { |
|||
log.Println("==== 等待 Node 注册 ====") |
|||
for mm.masterRPCService.nodeManager.NodesCount() == 0 { |
|||
time.Sleep(time.Second * 10) |
|||
} |
|||
} |
|||
func (the *EtMaster) notifyData(data common_models.IDataTrace, callNodeFunc func(*NodeRpc, common_models.IDataTrace)) { |
|||
thingId := data.GetThingId() |
|||
isMatch := false |
|||
the.nodeMap.Range(func(address, value interface{}) bool { |
|||
if nodePtr, ok := value.(*NodeRpc); ok { |
|||
if nodePtr != nil { |
|||
if contains(nodePtr.args.ThingIds, thingId) { |
|||
isMatch = true |
|||
go callNodeFunc(nodePtr, data) |
|||
return false |
|||
|
|||
// AggDataPublishing 发布聚合数据
|
|||
func (mm *ETMaster) AggDataPublishing() { |
|||
concurrency := configLoad.LoadConfig().GetInt32("performance.master.rpc.concurrency") // 并发请求数 50
|
|||
mm.initSendStatus(&mm.aggSendStatus, concurrency) |
|||
go mm.monitorSendStatus(&mm.aggSendStatus, "aggSendStatus") |
|||
mm.startDataPublishing(&mm.aggDataHandlers, "AggData", mm.sendAggData, &mm.aggSendStatus) |
|||
} |
|||
|
|||
// RawDataPublishing 发布原始数据
|
|||
func (mm *ETMaster) RawDataPublishing() { |
|||
concurrency := configLoad.LoadConfig().GetInt32("performance.master.rpc.concurrency") // 并发请求数 50
|
|||
mm.initSendStatus(&mm.rawSendStatus, concurrency) |
|||
go mm.monitorSendStatus(&mm.rawSendStatus, "rawSendStatus") |
|||
mm.startDataPublishing(&mm.rawDataHandlers, "RawData", mm.sendRawData, &mm.rawSendStatus) |
|||
} |
|||
|
|||
// initSendStatus 初始化发送状态
|
|||
func (mm *ETMaster) initSendStatus(status *SendStatus, threshold int32) { |
|||
status.limitThreshold = threshold |
|||
atomic.StoreInt32(&status.receiving, 1) |
|||
} |
|||
|
|||
// startDataPublishing 启动数据发布
|
|||
func (mm *ETMaster) startDataPublishing(handlers *sync.Map, handlerType string, sendFunc func(string, []string) error, status *SendStatus) { |
|||
// 创建一个 Goroutine 池,最大并发数为 500
|
|||
pool, err := ants.NewPool(500) |
|||
if err != nil { |
|||
log.Fatalf("创建 Goroutine 池失败: %v", err) |
|||
} |
|||
|
|||
var wg sync.WaitGroup |
|||
index := 0 |
|||
handlers.Range(func(key, value any) bool { |
|||
handler := value.(data_source.IMessageHandler) |
|||
dataChannel := handler.GetDataChannel() |
|||
log.Printf("启动[%s-Publishing]协程,Handler%d,dataChannel[%p] 容量:%d", handlerType, index, dataChannel, cap(dataChannel)) |
|||
|
|||
wg.Add(1) |
|||
go func(idx int) { |
|||
defer wg.Done() |
|||
|
|||
for { |
|||
// 检查是否暂停接收
|
|||
if atomic.LoadInt32(&status.receiving) == 0 { |
|||
log.Printf("%sHandler%d: 接收已暂停,等待未完成的消息处理", handlerType, idx) |
|||
time.Sleep(100 * time.Millisecond) |
|||
continue |
|||
} |
|||
|
|||
select { |
|||
case d, ok := <-dataChannel: // 检查 dataChannel 是否已关闭
|
|||
if !ok { |
|||
log.Printf("%sHandler%d: dataChannel 已关闭,退出 Goroutine", handlerType, idx) |
|||
return // 退出 Goroutine
|
|||
} |
|||
|
|||
data := d |
|||
atomic.AddInt32(&status.inProgressCount, 1) |
|||
log.Printf("[%s-Publishing] inProgressCount=%d. Handler%d 预备发送[%d]条数据,dataChannel[%p] 当前长度: %d/%d", |
|||
handlerType, atomic.LoadInt32(&status.inProgressCount), idx, len(data.Messages), dataChannel, len(dataChannel), cap(dataChannel)) |
|||
|
|||
// 使用 ants 提交任务
|
|||
poolErr := pool.Submit(func() { |
|||
startTime := time.Now() |
|||
defer atomic.AddInt32(&status.inProgressCount, -1) // 任务完成后减少计数
|
|||
|
|||
if err := sendFunc(data.Id, data.Messages); err != nil { |
|||
log.Printf("%sHandler%d: 发送数据失败: %v. 耗时:%v", handlerType, idx, err, time.Since(startTime)) |
|||
// 将失败数据发送到 Kafka(使用 Goroutine 池)
|
|||
_ = pool.Submit(func() { |
|||
mm.errMessagesKafkaProducer.SendStringArrayMessage(fmt.Sprintf("DLP_%s", handlerType), data.Id, data.Messages) |
|||
}) |
|||
} else { |
|||
log.Printf("[%s-Publishing]协程,Handler%d 成功发送[%d]条数据。耗时:%v,dataChannel[%p] 当前长度: %d/%d", |
|||
handlerType, idx, len(data.Messages), time.Since(startTime), dataChannel, len(dataChannel), cap(dataChannel)) |
|||
} |
|||
}) |
|||
|
|||
if poolErr != nil { |
|||
log.Printf("%sHandler%d: 提交任务到 Goroutine 池失败: %v", handlerType, idx, poolErr) |
|||
atomic.AddInt32(&status.inProgressCount, -1) // 提交失败时减少计数
|
|||
} |
|||
|
|||
default: |
|||
// 如果 dataChannel 为空,则等待一段时间
|
|||
time.Sleep(10 * time.Millisecond) |
|||
} |
|||
} |
|||
} |
|||
}(index) |
|||
|
|||
index++ |
|||
return true |
|||
}) |
|||
|
|||
//无匹配触发 reBalance
|
|||
if !isMatch { |
|||
nodePtr := the.getNodeWithMinThings() |
|||
if nodePtr != nil { |
|||
nodePtr.args.ThingIds = append(nodePtr.args.ThingIds, thingId) |
|||
log.Printf("thingId:[%s]被分配到node:[%s]", thingId, nodePtr.args.Addr) |
|||
go callNodeFunc(nodePtr, data) |
|||
} |
|||
} |
|||
wg.Wait() |
|||
defer pool.Release() // 确保在函数结束时释放池
|
|||
} |
|||
|
|||
// callNodeService 调用 etNode 的RPC服务
|
|||
func (the *EtMaster) callNodeService(node *NodeRpc, data common_models.IDataTrace) { |
|||
if node.client == nil { |
|||
log.Printf("node [%v] client=nil", node.args) |
|||
return |
|||
} |
|||
|
|||
var serviceMethod = "" |
|||
var resultCH chan int |
|||
var v interface{} |
|||
|
|||
switch data.(type) { |
|||
case *common_models.IotaData: |
|||
v = data.(*common_models.IotaData) |
|||
the.exporter.OnIotaData2metricByPrometheus(data.(*common_models.IotaData)) |
|||
serviceMethod = "etNode.IotaDataHandler" |
|||
resultCH = node.resultCH |
|||
case *common_models.AggData: |
|||
v = data.(*common_models.AggData) |
|||
serviceMethod = "etNode.AggDataHandler" |
|||
resultCH = node.aggResultCH |
|||
default: |
|||
log.Printf("Unknown kafka data type:%v", v) |
|||
return |
|||
} |
|||
|
|||
log.Printf("RPC[%s] node待处理的数据:%+v \n", serviceMethod, v) |
|||
func (mm *ETMaster) sendRawData(thingId string, data []string) error { |
|||
dataLog := fmt.Sprintf("thingId[%s]共[%d]条数据。", thingId, len(data)) |
|||
//log.Printf("[RawData-Publishing][sendRawData]1.开始处理。%s", dataLog)
|
|||
|
|||
go func() { |
|||
defer timeCost(node.args.ID, data.Q(), time.Now()) |
|||
var reply bool |
|||
err := node.client.Call(serviceMethod, data, &reply) |
|||
var nodeConn *node_manager.NodeConnection |
|||
var err error |
|||
retry := 0 |
|||
|
|||
result := boolToInt(reply) |
|||
// 尝试获取 NodeConnection
|
|||
for retry < 3 { |
|||
startTime := time.Now() |
|||
nodeConn, err = mm.nodeManager.GetNodeConnection() |
|||
duration := time.Since(startTime) // 计算获取连接的耗时
|
|||
log.Printf("[sendRawData]1.获取 NodeConnection 耗时: %v", duration) |
|||
|
|||
if err != nil { |
|||
isAggParseErr := strings.Contains(err.Error(), "aggData非法数据") |
|||
log.Printf("master调用node异常。Error:%s", err.Error()) |
|||
if !isAggParseErr { |
|||
// rpc 调用node, err:read tcp 10.8.30.104:57230->10.8.30.104:40000: wsarecv: An existing connection was forcibly closed by the remote host.
|
|||
result = 2 |
|||
} |
|||
log.Printf("[sendRawData]1.获取 NodeConnection 失败,错误: %v", err) |
|||
//m.kafkaDS.StopConsumers() // TODO 暂停消费 Kafka 消息
|
|||
//log.Println("============ Kafka 消费已暂停...")
|
|||
retry++ |
|||
time.Sleep(time.Duration(2<<retry) * time.Second) // 指数退避
|
|||
continue |
|||
} |
|||
resultCH <- result |
|||
}() |
|||
|
|||
// RPC调用结果
|
|||
errorCode := 0 |
|||
timeoutMills := 300 * 1000 * time.Millisecond // 5分组
|
|||
select { |
|||
case reply := <-resultCH: |
|||
// reply 0=false(RPC访问结果返回false),1=true(RPC访问结果返回true),2访问RPC网络异常
|
|||
if reply == 2 { |
|||
log.Printf("RPC[%s]node连接已被关闭。未处理的数据*** %+v *** \n\n", serviceMethod, v) |
|||
errorCode = 200 |
|||
} else if reply == 0 { |
|||
//log.Printf("RPC[%s]node处理后回复false。处理失败的数据*** %+v *** \n\n", serviceMethod, v)
|
|||
errorCode = 100 |
|||
} |
|||
case <-time.After(timeoutMills): |
|||
log.Printf("RPC[%s]node调用超时退出gorutine,timeout:%v。未处理的数据*** %+v *** \n\n", serviceMethod, timeoutMills, v) |
|||
errorCode = 300 |
|||
// TODO 成功获取连接,恢复 Kafka 消费并退出循环
|
|||
//m.kafkaDS.ResumeConsumers()
|
|||
//log.Printf("[sendAggData] 成功获取 NodeConnection: %+v", nodeConn)
|
|||
break |
|||
} |
|||
|
|||
// 100 故障:程序内部问题
|
|||
// 200 故障:网络通信问题
|
|||
// 300 故障:处理超时
|
|||
if errorCode >= 200 { |
|||
the.errorHandle(errorCode, node.args.Addr, fmt.Sprintf("%s|%s", data.R(), data.T())) |
|||
} else { |
|||
//log.Printf("node[%s]node处理后回复true。处理成功的数据*** %+v *** \n\n", node.args.Addr, data.R(), data.T())
|
|||
//log.Printf("RPC[%s]node已处理的数据errorCode=%d *** %+v *** \n\n", serviceMethod, errorCode, v)
|
|||
log.Printf("****** RPC[%s]node已处理的数据errorCode=%d ****** \n\n", serviceMethod, errorCode) |
|||
if err != nil || nodeConn == nil { |
|||
log.Printf("[sendRawData]1. 达到最大重试次数,无法获取健康节点连接,错误: %v", err) |
|||
return err |
|||
} |
|||
} |
|||
|
|||
// NodeRegister 是 RPC 服务方法,由 et_node 远程调用
|
|||
func (the *EtMaster) NodeRegister(nodeArgs *common_models.NodeArgs, reply *bool) error { |
|||
node := &NodeRpc{ |
|||
args: nodeArgs, |
|||
resultCH: make(chan int, 1), |
|||
aggResultCH: make(chan int, 1), |
|||
client: nil, |
|||
} |
|||
//master 初始化 node client
|
|||
client, err := rpc.Dial("tcp", nodeArgs.Addr) |
|||
if err != nil { |
|||
log.Printf("链接node失败-> node[%v]", nodeArgs.Addr) |
|||
return err |
|||
// 记录调用 Node.ProcessData 的时间
|
|||
//defer LogProcessDataTimeCost(nodeConn.NArgs.Addr, "[]aggData", time.Now())
|
|||
// RPC 调用 Node.ProcessData,传递 []*pb.AggData
|
|||
resultChan := make(chan error, 1) |
|||
log.Printf("[sendRawData]2.开始调用 RPC[Node.HandleRawData] %s", dataLog) |
|||
callStartTime := time.Now() |
|||
callErr := nodeConn.CallHandleIotaData(thingId, data) |
|||
log.Printf("<--[sendRawData]3.RPC调用成功。耗时: %v,%s", time.Since(callStartTime), dataLog) |
|||
resultChan <- callErr |
|||
|
|||
// 设置超时
|
|||
select { |
|||
case callErr := <-resultChan: |
|||
if callErr != nil { |
|||
log.Printf("[sendRawData]4.RPC调用结束,错误: %+v,%s", callErr, dataLog) |
|||
return callErr |
|||
} |
|||
//log.Printf("[sendRawData]4.RPC调用成功")
|
|||
case <-time.After(5 * time.Minute): // 设置超时
|
|||
log.Printf("[sendRawData]4.请求超过5分钟。%s", dataLog) |
|||
return errors.New("请求超时5m") |
|||
} |
|||
|
|||
node.client = client |
|||
the.addOrUpdate(nodeArgs.Addr, node) |
|||
log.Printf("node服务[%v] 注册成功", nodeArgs) |
|||
the.printNodes() |
|||
*reply = true |
|||
return nil |
|||
} |
|||
|
|||
func (the *EtMaster) NodeHeart(nodeArgs *common_models.NodeArgs, reply *bool) error { |
|||
if !the.clientIsValid(nodeArgs.Addr) { |
|||
log.Printf("收到-未注册的node[%v] 心跳", nodeArgs) |
|||
*reply = false |
|||
err := the.NodeRegister(nodeArgs, reply) |
|||
func (mm *ETMaster) sendAggData(structId string, data []string) error { |
|||
dataLog := fmt.Sprintf("structId[%s]共[%d]条数据。", structId, len(data)) |
|||
//log.Printf("[AggData-Publishing][sendAggData]1.开始处理。%s", dataLog)
|
|||
|
|||
var nodeConn *node_manager.NodeConnection |
|||
var err error |
|||
retry := 0 |
|||
|
|||
for retry < 3 { |
|||
startTime := time.Now() |
|||
nodeConn, err = mm.nodeManager.GetNodeConnection() |
|||
duration := time.Since(startTime) // 计算获取连接的耗时
|
|||
log.Printf("[AggData-Publishing][sendAggData]2.获取 NodeConnection 耗时: %v", duration) |
|||
|
|||
if err != nil { |
|||
return errors.New("未注册的node") |
|||
} else { |
|||
*reply = true |
|||
log.Printf("收到未注册的node[%v]心跳,master已将node重新注册。", nodeArgs) |
|||
return nil |
|||
log.Printf("[AggData-Publishing][sendAggData]2.1获取 NodeConnection 失败,错误: %v", err) |
|||
//m.kafkaDS.StopConsumers() // TODO 暂停消费 Kafka 消息
|
|||
//log.Println("============ Kafka 消费已暂停...")
|
|||
retry++ |
|||
time.Sleep(time.Duration(2<<retry) * time.Second) // 指数退避
|
|||
continue |
|||
} |
|||
|
|||
// TODO 成功获取连接,恢复 Kafka 消费并退出循环
|
|||
//m.kafkaDS.ResumeConsumers()
|
|||
//log.Printf("[sendAggData] 成功获取 NodeConnection: %+v", nodeConn)
|
|||
|
|||
break |
|||
} |
|||
|
|||
log.Printf("收到-node[%v] 心跳", nodeArgs) |
|||
*reply = true |
|||
if err != nil || nodeConn == nil { |
|||
log.Printf("[AggData-Publishing][sendAggData]2.2 达到最大重试次数,无法获取健康节点连接,错误: %v", err) |
|||
return err |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
// 记录调用 Node.ProcessData 的时间
|
|||
//defer LogProcessDataTimeCost(nodeConn.NArgs.Addr, "[]aggData", time.Now())
|
|||
// RPC 调用 Node.ProcessData,传递 []*pb.AggData
|
|||
resultChan := make(chan error, 1) |
|||
log.Printf("[AggData-Publishing][sendAggData]3.开始调用 RPC[Node.HandleAggData] %s", dataLog) |
|||
callStartTime := time.Now() |
|||
callErr := nodeConn.CallHandleAggData(structId, data) |
|||
log.Printf("[AggData-Publishing][sendAggData]4.RPC调用耗时: %v,%s", time.Since(callStartTime), dataLog) |
|||
resultChan <- callErr |
|||
|
|||
// NodeUnRegister 节点RPC 注销
|
|||
func (the *EtMaster) NodeUnRegister(nodeArgs *common_models.NodeArgs, reply *bool) error { |
|||
value, ok := the.nodeMap.Load(nodeArgs.Addr) |
|||
node := value.(*NodeRpc) |
|||
if ok && node.client != nil { |
|||
err := node.client.Close() |
|||
if err != nil { |
|||
log.Printf("节点[%s] client关闭异常 %s", nodeArgs.Addr, err.Error()) |
|||
select { |
|||
case callErr := <-resultChan: |
|||
if callErr != nil { |
|||
log.Printf("[AggData-Publishing][sendAggData]4.RPC调用结束,错误: %+v,%s", callErr, dataLog) |
|||
return callErr |
|||
} |
|||
the.nodeMap.Delete(nodeArgs.Addr) |
|||
//log.Printf("[sendAggData]4.RPC调用成功")
|
|||
case <-time.After(5 * time.Minute): // 设置超时
|
|||
log.Printf("[AggData-Publishing][sendAggData]请求超过5分钟。%s", dataLog) |
|||
return errors.New("请求超时5m") |
|||
} |
|||
|
|||
log.Printf("node服务[%v] 注销成功", nodeArgs) |
|||
*reply = true |
|||
return nil |
|||
} |
|||
|
|||
func (the *EtMaster) WaitNodeRegister() { |
|||
log.Println("等待 node进行注册") |
|||
// monitorSendStatus 监控发送状态
|
|||
func (mm *ETMaster) monitorSendStatus(status *SendStatus, statusName string) { |
|||
for { |
|||
if the.nodeMapCount() > 0 { |
|||
break |
|||
inProgressCount := atomic.LoadInt32(&status.inProgressCount) |
|||
if inProgressCount > status.limitThreshold { |
|||
atomic.StoreInt32(&status.receiving, 0) |
|||
log.Printf("[%s] 未完成消息数量超过阈值,暂停接收新的消息。%+v\n", statusName, status) |
|||
} else { |
|||
atomic.StoreInt32(&status.receiving, 1) |
|||
} |
|||
time.Sleep(time.Second * 10) |
|||
time.Sleep(500 * time.Millisecond) |
|||
} |
|||
} |
|||
|
|||
func (the *EtMaster) ConnectNode() { |
|||
the.nodeMap.Range(func(key, value interface{}) bool { |
|||
node := value.(*NodeRpc) |
|||
nodeAddr := key.(string) |
|||
// MonitorShutdown 监控退出信号
|
|||
func (mm *ETMaster) MonitorShutdown() { |
|||
sigChan := make(chan os.Signal, 1) |
|||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) |
|||
|
|||
if node.client == nil { |
|||
client, err := rpc.Dial("tcp", nodeAddr) |
|||
if err != nil { |
|||
log.Printf("链接node失败-> node[%v]", nodeAddr) |
|||
return true |
|||
} |
|||
sig := <-sigChan |
|||
log.Printf("************ 接收到信号: %s,正在关闭服务器...", sig) |
|||
|
|||
node.client = client |
|||
the.nodeMap.Store(nodeAddr, node) |
|||
} |
|||
mm.closeDataHandlers(&mm.rawDataHandlers, "DLP_DATA_RAW") |
|||
mm.closeDataHandlers(&mm.aggDataHandlers, "DLP_DATA_AGG") |
|||
|
|||
return true |
|||
}) |
|||
mm.errMessagesKafkaProducer.Close() |
|||
mm.grpcServer.GracefulStop() |
|||
log.Println("************ 服务器已成功关闭") |
|||
} |
|||
|
|||
func (the *EtMaster) addOrUpdate(key string, newNode *NodeRpc) { |
|||
if val, ok := the.nodeMap.Load(key); ok { |
|||
hisNode := val.(*NodeRpc) |
|||
hisNode.client = newNode.client |
|||
the.nodeMap.Store(key, hisNode) |
|||
} else { |
|||
the.nodeMap.Store(key, newNode) |
|||
} |
|||
} |
|||
func (the *EtMaster) nodeMapCount() int { |
|||
count := 0 |
|||
the.nodeMap.Range(func(key, value interface{}) bool { |
|||
count++ |
|||
return true |
|||
}) |
|||
return count |
|||
} |
|||
func (the *EtMaster) clientIsValid(address string) bool { |
|||
val, ok := the.nodeMap.Load(address) |
|||
if !ok { |
|||
return false |
|||
} |
|||
// closeDataHandlers 关闭数据处理器
|
|||
func (mm *ETMaster) closeDataHandlers(handlers *sync.Map, dlpTopic string) { |
|||
handlers.Range(func(key, value any) bool { |
|||
handler := value.(data_source.IMessageHandler) |
|||
ch := handler.GetDataChannel() |
|||
close(ch) |
|||
|
|||
if val.(*NodeRpc).client == nil { |
|||
return false |
|||
} |
|||
return true |
|||
} |
|||
|
|||
// 获取最少things的节点
|
|||
func (the *EtMaster) getNodeWithMinThings() *NodeRpc { |
|||
var minNode *NodeRpc |
|||
minThings := math.MaxInt64 // 初始化为最大值
|
|||
|
|||
the.nodeMap.Range(func(key, value interface{}) bool { |
|||
node := value.(*NodeRpc) |
|||
if len(node.args.ThingIds) < minThings { |
|||
minThings = len(node.args.ThingIds) |
|||
minNode = node |
|||
for data := range ch { |
|||
mm.errMessagesKafkaProducer.SendStringArrayMessage(dlpTopic, data.Id, data.Messages) |
|||
} |
|||
|
|||
return true |
|||
}) |
|||
|
|||
return minNode |
|||
} |
|||
func (the *EtMaster) printNodes() { |
|||
count := 0 |
|||
info := "" |
|||
the.nodeMap.Range(func(key, value interface{}) bool { |
|||
count++ |
|||
node := value.(*NodeRpc) |
|||
info += fmt.Sprintf("%s,%s\n", node.args.ID, node.args.Addr) |
|||
return true |
|||
}) |
|||
countInfo := fmt.Sprintf("共[%d]个节点:\n ", count) |
|||
log.Printf("%s %s\n", countInfo, info) |
|||
} |
|||
func (the *EtMaster) errorHandle(errCode int, address string, dataDesc string) { |
|||
val, ok := the.nodeMap.Load(address) |
|||
if !ok { |
|||
log.Printf("【tidyNodes】Error:不存在的node[%s]\n", address) |
|||
return |
|||
} |
|||
node := val.(*NodeRpc) |
|||
|
|||
//发送 stop 信号
|
|||
the.sleepCH <- true |
|||
log.Println("=============================================") |
|||
|
|||
// 100 故障:程序内部错误
|
|||
// 200 故障:网络通信问题
|
|||
// 300 故障:处理超时
|
|||
if errCode == 200 { |
|||
log.Printf("node[%v]连接已中断,休眠5秒后,将删除该节点。消息:%s", node.args.Addr, dataDesc) |
|||
time.Sleep(time.Second * 5) |
|||
the.nodeMap.Delete(address) |
|||
} else if errCode == 300 { |
|||
log.Printf("node[%s]处理超时,将休眠5秒后,将删除该节点。消息:%s", address, dataDesc) |
|||
time.Sleep(time.Second * 5) |
|||
the.nodeMap.Delete(address) |
|||
} |
|||
|
|||
the.printNodes() |
|||
} |
|||
|
|||
func contains(arr []string, target string) bool { |
|||
for _, value := range arr { |
|||
if value == target { |
|||
return true |
|||
} |
|||
} |
|||
return false |
|||
} |
|||
func timeCost(nodeId, deviceId string, start time.Time) { |
|||
tc := time.Since(start) |
|||
log.Printf("master调用node[%s],处理[%s]耗时%v", nodeId, deviceId, tc) |
|||
} |
|||
func boolToInt(b bool) int { |
|||
if b { |
|||
return 1 |
|||
} |
|||
return 0 |
|||
} |
|||
|
@ -0,0 +1,137 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"context" |
|||
"et_rpc" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"log" |
|||
"master/node_manager" |
|||
) |
|||
|
|||
// 实现 gRPC 服务接口
|
|||
type MasterRPCService struct { |
|||
pb.UnimplementedMasterServiceServer |
|||
nodeManager *node_manager.NodeManager // 用于存储节点信息
|
|||
} |
|||
|
|||
func NewMasterRPCService(nodeManager *node_manager.NodeManager) *MasterRPCService { |
|||
return &MasterRPCService{ |
|||
nodeManager: nodeManager, |
|||
} |
|||
} |
|||
|
|||
// 实现 RegisterNode 方法
|
|||
func (s *MasterRPCService) RegisterNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) { |
|||
// 创建响应对象
|
|||
response := &pb.RpcResponse{ |
|||
Status: pb.RpcResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
} |
|||
|
|||
// 检查请求是否有效
|
|||
if req == nil { |
|||
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "节点注册失败:req 为 nil") |
|||
} |
|||
|
|||
// 添加节点
|
|||
nodeArgs := &et_rpc.NodeArgs{ |
|||
ID: req.Id, |
|||
Addr: req.Address, |
|||
Load: 0, |
|||
ResourceJson: "", |
|||
Weight: 0, |
|||
Status: 0, |
|||
ErrCode: 0, |
|||
ErrMessage: "", |
|||
} |
|||
|
|||
if err := s.nodeManager.AddNode(nodeArgs); err != nil { |
|||
msg := fmt.Sprintf("[%s]节点注册失败:%s", nodeArgs.Addr, err.Error()) |
|||
return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg) |
|||
} |
|||
|
|||
response.Status = pb.RpcResponse_SUCCESS |
|||
response.ErrorMessage = fmt.Sprintf("[%s]节点注册成功!!", req.Address) |
|||
log.Printf(response.ErrorMessage) |
|||
return response, nil |
|||
} |
|||
|
|||
// 实现 HeartbeatNode 方法
|
|||
func (s *MasterRPCService) HeartbeatNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) { |
|||
// 创建响应对象
|
|||
response := &pb.RpcResponse{ |
|||
Status: pb.RpcResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
} |
|||
|
|||
// 检查请求是否有效
|
|||
if req == nil { |
|||
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil") |
|||
} |
|||
|
|||
// 尝试更新节点状态
|
|||
if !s.nodeManager.NodeExists(req.Address) { |
|||
msg := fmt.Sprintf("未注册的节点: %s", req.Address) |
|||
return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg) |
|||
} |
|||
|
|||
log.Printf("收到 Node[%s] 心跳", req.Address) |
|||
response.Status = pb.RpcResponse_SUCCESS |
|||
return response, nil |
|||
} |
|||
|
|||
// 实现 UnregisterNode 方法
|
|||
func (s *MasterRPCService) UnregisterNode(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) { |
|||
// 创建响应对象
|
|||
response := &pb.RpcResponse{ |
|||
Status: pb.RpcResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
} |
|||
|
|||
// 检查请求是否有效
|
|||
if req == nil { |
|||
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil") |
|||
} |
|||
|
|||
// 尝试更新节点状态
|
|||
if !s.nodeManager.RemoveNode(req.Address) { |
|||
log.Printf("删除节点Node[%s],节点不存在", req.Address) |
|||
//return s.createErrorResponse(pb.RpcResponse_NOT_FOUND, msg)
|
|||
} |
|||
|
|||
log.Printf("节点Node[%s]已删除", req.Address) |
|||
response.Status = pb.RpcResponse_SUCCESS |
|||
return response, nil |
|||
} |
|||
|
|||
// 实现 CheckMasterStatus 方法
|
|||
func (s *MasterRPCService) CheckMasterStatus(ctx context.Context, req *pb.NodeRequest) (*pb.RpcResponse, error) { |
|||
// 创建响应对象
|
|||
response := &pb.RpcResponse{ |
|||
Status: pb.RpcResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
} |
|||
|
|||
// 检查请求是否有效
|
|||
if req == nil { |
|||
return s.createErrorResponse(pb.RpcResponse_INVALID_ARGUMENT, "请求无效: req 为 nil") |
|||
} |
|||
|
|||
// 记录主节点状态检查信息
|
|||
log.Printf("主节点状态被节点检查: ID=%s", req.Address) |
|||
return response, nil |
|||
} |
|||
|
|||
// mustEmbedUnimplementedMasterServiceServer 是一个占位符方法
|
|||
func (s *MasterRPCService) mustEmbedUnimplementedMasterServiceServer() {} |
|||
|
|||
// createErrorResponse 用于创建错误响应
|
|||
func (s *MasterRPCService) createErrorResponse(status pb.RpcResponse_Status, message string) (*pb.RpcResponse, error) { |
|||
response := &pb.RpcResponse{ |
|||
Status: status, |
|||
ErrorMessage: message, |
|||
} |
|||
log.Printf(message) // 记录错误信息
|
|||
return response, fmt.Errorf(message) |
|||
} |
@ -0,0 +1,36 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"log" |
|||
"time" |
|||
) |
|||
|
|||
type AggDataHandler struct { |
|||
key string |
|||
topic string |
|||
partitionID int |
|||
dataChannel chan *RPCPayload // 用于发送打包后的数据
|
|||
} |
|||
|
|||
func NewAggDataHandler(key, topic string, partitionID int) *AggDataHandler { |
|||
handler := &AggDataHandler{ |
|||
key: key, |
|||
topic: topic, |
|||
partitionID: partitionID, |
|||
dataChannel: make(chan *RPCPayload, 10), |
|||
} |
|||
|
|||
return handler |
|||
} |
|||
|
|||
func (h *AggDataHandler) HandleMessage(structId string, values []string) bool { |
|||
h.dataChannel <- &RPCPayload{Id: structId, Messages: values} |
|||
log.Printf("****** AggDataHandler.HandleMessage() ,h.dataChannel【%p】通道数据量:%d/%d", h.dataChannel, len(h.dataChannel), cap(h.dataChannel)) |
|||
time.Sleep(50 * time.Millisecond) |
|||
return true |
|||
} |
|||
|
|||
// GetDataChannel 返回 dataChannel
|
|||
func (h *AggDataHandler) GetDataChannel() chan *RPCPayload { |
|||
return h.dataChannel |
|||
} |
@ -0,0 +1,49 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"log" |
|||
"time" |
|||
) |
|||
|
|||
// IMessageHandler 是 kafka 消息处理者接口
|
|||
type IMessageHandler interface { |
|||
HandleMessage(key string, values []string) bool |
|||
GetDataChannel() chan *RPCPayload |
|||
} |
|||
|
|||
type RPCPayload struct { |
|||
Id string |
|||
Messages []string |
|||
} |
|||
|
|||
type RawDataHandler struct { |
|||
key string |
|||
topic string |
|||
partitionID int |
|||
dataChannel chan *RPCPayload |
|||
} |
|||
|
|||
// 创建一个新的 RawDataHandler 实例
|
|||
func NewRawDataHandler(key, topic string, partitionID int) *RawDataHandler { |
|||
handler := &RawDataHandler{ |
|||
key: key, |
|||
topic: topic, |
|||
partitionID: partitionID, |
|||
dataChannel: make(chan *RPCPayload, 10), |
|||
} |
|||
|
|||
return handler |
|||
} |
|||
|
|||
// 在 kafka_dataSource.go 的 Producer() 中被使用
|
|||
func (h *RawDataHandler) HandleMessage(thingId string, values []string) bool { |
|||
h.dataChannel <- &RPCPayload{Id: thingId, Messages: values} |
|||
log.Printf("--> RawDataHandler%d ,h.dataChannel【%p】通道数据量: %d/%d", h.partitionID, h.dataChannel, len(h.dataChannel), cap(h.dataChannel)) |
|||
time.Sleep(50 * time.Millisecond) |
|||
return true |
|||
} |
|||
|
|||
// GetDataChannel 返回 dataChannel
|
|||
func (h *RawDataHandler) GetDataChannel() chan *RPCPayload { |
|||
return h.dataChannel |
|||
} |
@ -0,0 +1,317 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"github.com/IBM/sarama" |
|||
"github.com/panjf2000/ants/v2" |
|||
"golang.org/x/time/rate" |
|||
"log" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type AggConsumerGroupHandler struct { |
|||
kafkaConfig KafkaConfig |
|||
topicHandlers sync.Map // 主题处理方法
|
|||
|
|||
kafkaPaused bool //是否处于暂停数据接收状态
|
|||
ControlChan chan string // 控制信号通道
|
|||
|
|||
mu sync.RWMutex |
|||
} |
|||
|
|||
func NewAggConsumerGroupHandler(kafkaConfig KafkaConfig) *AggConsumerGroupHandler { |
|||
return &AggConsumerGroupHandler{ |
|||
kafkaConfig: kafkaConfig, |
|||
ControlChan: make(chan string), |
|||
} |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) ConnectConsumerGroup() { |
|||
log.Println("AggData kafka init...") |
|||
vp := configLoad.LoadConfig() |
|||
minFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.minFetch") |
|||
maxFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxFetch") |
|||
maxWaitTime := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxWaitTime") |
|||
|
|||
// 消费者配置信息
|
|||
config := sarama.NewConfig() |
|||
config.Consumer.Return.Errors = false // 不返回消费过程中的错误
|
|||
config.Version = sarama.V2_0_0_0 |
|||
config.Consumer.Offsets.Initial = sarama.OffsetOldest // 从最旧的消息开始消费
|
|||
config.Consumer.Offsets.AutoCommit.Enable = true // 启动自动提交偏移量
|
|||
config.Consumer.Offsets.AutoCommit.Interval = 1000 * time.Millisecond |
|||
config.Consumer.Fetch.Min = minFetch // 最小拉取 10 KB
|
|||
config.Consumer.Fetch.Max = maxFetch // 最大拉取 5 MB
|
|||
config.Consumer.MaxWaitTime = time.Duration(maxWaitTime) * time.Millisecond // 最大等待时间 ms
|
|||
config.Consumer.Retry.Backoff = 10000 * time.Millisecond // 消费失败后重试的延迟时间
|
|||
//config.Consumer.Retry.BackoffFunc = func(retries int) time.Duration {}
|
|||
config.Consumer.Group.Session.Timeout = 60000 * time.Millisecond // 消费者的心跳 默认10s -> 30s
|
|||
config.Consumer.Group.Heartbeat.Interval = 6000 * time.Millisecond // Heartbeat 这个值必须小于 session.timeout.ms ,一般小于 session.timeout.ms/3,默认是3s
|
|||
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()} // 设置消费者组的负载均衡策略为轮询策略
|
|||
|
|||
// 创建消费者组
|
|||
client, err := sarama.NewConsumerGroup(h.kafkaConfig.Brokers, h.kafkaConfig.GroupID, config) |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
defer func() { |
|||
_ = client.Close() |
|||
}() |
|||
|
|||
// 启动错误处理协程
|
|||
go func() { |
|||
for err := range client.Errors() { |
|||
log.Println("消费错误:", err) |
|||
} |
|||
}() |
|||
|
|||
ctx, cancel := context.WithCancel(context.Background()) |
|||
defer cancel() |
|||
|
|||
// 接收控制信号
|
|||
go func() { |
|||
for { |
|||
select { |
|||
case signal := <-h.ControlChan: |
|||
switch signal { |
|||
case "stop": |
|||
log.Printf("[Agg-ConsumerGroup-%d] 收到停止信号,将停止消费.", h.kafkaConfig.ClientID) |
|||
h.kafkaPaused = true |
|||
case "resume": |
|||
log.Printf("[Agg-ConsumerGroup-%d] 收到恢复信号,将恢复消费.", h.kafkaConfig.ClientID) |
|||
h.kafkaPaused = false |
|||
} |
|||
} |
|||
} |
|||
}() |
|||
|
|||
log.Printf("[Agg-ConsumerGroup-%d] 准备启动 Kafka 消费者协程。订阅的主题: %v", h.kafkaConfig.ClientID, h.kafkaConfig.Topic) |
|||
|
|||
// 创建消费者实例
|
|||
consumerInstance := h |
|||
|
|||
var wg sync.WaitGroup |
|||
wg.Add(1) |
|||
go func() { |
|||
defer wg.Done() |
|||
for { |
|||
topics := []string{h.kafkaConfig.Topic} |
|||
err1 := client.Consume(ctx, topics, consumerInstance) |
|||
if err1 != nil { |
|||
log.Printf("[Agg-ConsumerGroup-%d] 订阅主题[%v]异常。%s", h.kafkaConfig.ClientID, h.kafkaConfig.Topic, err1.Error()) |
|||
return |
|||
} |
|||
|
|||
if ctx.Err() != nil { |
|||
log.Println(ctx.Err()) |
|||
return |
|||
} |
|||
} |
|||
}() |
|||
|
|||
log.Println("AggData Sarama consumer up and running ...") |
|||
wg.Wait() |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) Setup(session sarama.ConsumerGroupSession) error { |
|||
// 在此执行任何必要的设置任务。
|
|||
log.Printf("data_agg消费者组会话开始,%+v", session.Claims()) |
|||
return nil |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) Cleanup(session sarama.ConsumerGroupSession) error { |
|||
// 在此执行任何必要的清理任务。
|
|||
log.Println("data_agg消费者组会话结束,", session.Claims()) |
|||
return nil |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
log.Printf("data_agg 处理消费者组会话,%+v. MemberID: %v, Topic: %v, Partition: %v \n", session.Claims(), session.MemberID(), claim.Topic(), claim.Partition()) |
|||
topic := claim.Topic() |
|||
isDeadLetterQueue := false // 是否为死信队列消息
|
|||
if len(topic) > 4 && topic[:4] == "DLP_" { |
|||
isDeadLetterQueue = true |
|||
} |
|||
|
|||
if isDeadLetterQueue { |
|||
return h.DLPConsumeClaim(session, claim) |
|||
} else { |
|||
return h.BatchConsumeClaim(session, claim) |
|||
} |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) BatchConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
maxBatchSize := configLoad.LoadConfig().GetInt("performance.master.kafkaConsumer.data_agg.maxBatchSize") |
|||
messageChannel := make(chan map[string][]*sarama.ConsumerMessage, 50) |
|||
topicHandlerKey := fmt.Sprintf("%s_%d", claim.Topic(), claim.Partition()) |
|||
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey) |
|||
if !isValid { |
|||
log.Printf("[Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
return fmt.Errorf("[Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
} |
|||
|
|||
// 使用 sync.Pool 复用 map 对象
|
|||
messageMapPool := sync.Pool{ |
|||
New: func() interface{} { |
|||
return make(map[string][]*sarama.ConsumerMessage) |
|||
}, |
|||
} |
|||
|
|||
// 启动一个 goroutine 来处理消息
|
|||
var wg sync.WaitGroup |
|||
pool, _ := ants.NewPool(100) |
|||
defer pool.Release() |
|||
|
|||
//创建一个速率限制器,它允许每 10 毫秒执行一次操作,且在这个时间窗口内最多只能执行一次。
|
|||
var aggRateLimiter = rate.NewLimiter(rate.Every(10*time.Millisecond), 1) |
|||
go func() { |
|||
for structMessages := range messageChannel { |
|||
_ = aggRateLimiter.Wait(context.Background()) // 控制消费速率
|
|||
|
|||
wg.Add(len(structMessages)) |
|||
|
|||
for k, v := range structMessages { |
|||
key := k |
|||
msgs := v |
|||
|
|||
_ = pool.Submit(func() { |
|||
defer wg.Done() |
|||
defer messageMapPool.Put(structMessages) |
|||
|
|||
if len(msgs) == 0 { |
|||
return |
|||
} |
|||
|
|||
values := make([]string, len(msgs)) |
|||
for i, msg := range msgs { |
|||
values[i] = string(msg.Value) |
|||
} |
|||
|
|||
// 处理消息并检查是否成功
|
|||
isProcessed := msgHandler.(func(structId string, values []string) bool)(key, values) //msgHandler(key, values)
|
|||
if !isProcessed { |
|||
log.Printf("[Agg-ConsumerGroup] 消息处理失败,键: %s,消息: %v", key, msgs) |
|||
} else { |
|||
// 处理成功后,消息标记为已处理
|
|||
for _, msg := range msgs { |
|||
session.MarkMessage(msg, "is handled") |
|||
} |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
}() |
|||
|
|||
batchBuffer := make(map[string][]*sarama.ConsumerMessage) // 按键分类的批次缓冲
|
|||
currentBatchSize := make(map[string]int) // 记录每个 key 的当前批次大小
|
|||
|
|||
// 定义一个定时器,用于处理残余消息
|
|||
ctx, cancel := context.WithCancel(context.Background()) |
|||
defer cancel() |
|||
|
|||
ticker := time.NewTicker(20 * time.Second) |
|||
defer ticker.Stop() |
|||
go func() { |
|||
for { |
|||
select { |
|||
case <-ticker.C: |
|||
// 处理剩余的消息
|
|||
for structId, msgs := range batchBuffer { |
|||
if len(msgs) > 0 { |
|||
// 从池中获取 map 对象
|
|||
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage) |
|||
msgMap[structId] = msgs |
|||
messageChannel <- msgMap |
|||
delete(batchBuffer, structId) // 清除已处理的键
|
|||
delete(currentBatchSize, structId) |
|||
} |
|||
} |
|||
case <-ctx.Done(): |
|||
return // 退出 Goroutine
|
|||
} |
|||
} |
|||
}() |
|||
|
|||
// 读消息
|
|||
for msg := range claim.Messages() { |
|||
structId := string(msg.Key) |
|||
if structId == "" { |
|||
structId = "structId-null" |
|||
} |
|||
|
|||
// 将消息添加到批次缓冲
|
|||
batchBuffer[structId] = append(batchBuffer[structId], msg) |
|||
|
|||
// 计算当前 key 的消息大小
|
|||
currentBatchSize[structId] += len(msg.Value) |
|||
|
|||
// 如果当前批次达到 maxBatchSize,发送到通道并重置
|
|||
if currentBatchSize[structId] >= maxBatchSize { |
|||
// 从池中获取 map 对象
|
|||
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage) |
|||
msgMap[structId] = batchBuffer[structId] |
|||
messageChannel <- msgMap |
|||
delete(batchBuffer, structId) // 清除已处理的键
|
|||
delete(currentBatchSize, structId) // 清除已处理的大小
|
|||
} |
|||
} |
|||
|
|||
close(messageChannel) |
|||
wg.Wait() |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) DLPConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
topicHandlerKey := "DLP_DATA_AGG" |
|||
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey) |
|||
if !isValid { |
|||
log.Printf("[DLP-Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
return fmt.Errorf("[DLP-Agg-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
} |
|||
|
|||
for msg := range claim.Messages() { |
|||
structId := string(msg.Key) |
|||
if structId == "" { |
|||
structId = "structId-null" |
|||
} |
|||
|
|||
// 解析 value
|
|||
var value []string |
|||
err := json.Unmarshal(msg.Value, &value) |
|||
if err != nil { |
|||
log.Printf("[DLP_Agg-ConsumerGroup]Failed to unmarshal value: %v", err) |
|||
continue |
|||
} |
|||
|
|||
isProcessed := msgHandler.(func(structId string, values []string) bool)(structId, value) |
|||
if !isProcessed { |
|||
log.Printf("[DLP_Agg-ConsumerGroup]消息处理失败,structId: %s,消息: %v", structId, value) |
|||
} else { |
|||
// 处理成功后,消息标记为已处理
|
|||
session.MarkMessage(msg, "is handled") |
|||
} |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (h *AggConsumerGroupHandler) SetTopicHandler(topicHandlerKey string, fun func(structId string, values []string) bool) { |
|||
h.mu.Lock() |
|||
defer h.mu.Unlock() |
|||
h.topicHandlers.Store(topicHandlerKey, fun) |
|||
} |
|||
|
|||
// 消息消费启动控制
|
|||
func (h *AggConsumerGroupHandler) SetKafkaPaused(paused bool) { |
|||
h.kafkaPaused = paused |
|||
if paused { |
|||
log.Println("Kafka消息消费 已暂停.") |
|||
} else { |
|||
log.Println("Kafka消息消费 已恢复.") |
|||
} |
|||
} |
@ -0,0 +1,329 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"github.com/IBM/sarama" |
|||
"github.com/panjf2000/ants/v2" |
|||
"golang.org/x/time/rate" |
|||
"log" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type RawConsumerGroupHandler struct { |
|||
kafkaConfig KafkaConfig |
|||
topicHandlers sync.Map // 分区主题处理方法
|
|||
|
|||
kafkaPaused bool //是否处于暂停数据接收状态
|
|||
ControlChan chan string // 控制信号通道
|
|||
|
|||
mu sync.RWMutex |
|||
} |
|||
|
|||
func NewRawConsumerGroupHandler(kafkaConfig KafkaConfig) *RawConsumerGroupHandler { |
|||
return &RawConsumerGroupHandler{ |
|||
kafkaConfig: kafkaConfig, |
|||
ControlChan: make(chan string), |
|||
} |
|||
} |
|||
|
|||
func (h *RawConsumerGroupHandler) ConnectConsumerGroup() { |
|||
log.Println("RawData kafka init...") |
|||
vp := configLoad.LoadConfig() |
|||
minFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.minFetch") |
|||
maxFetch := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxFetch") |
|||
maxWaitTime := vp.GetInt32("performance.master.kafkaConsumer.consumerCfg.maxWaitTime") |
|||
|
|||
// 消费者配置信息
|
|||
config := sarama.NewConfig() |
|||
config.Consumer.Return.Errors = false // 不返回消费过程中的错误
|
|||
config.Version = sarama.V2_0_0_0 |
|||
config.Consumer.Offsets.Initial = sarama.OffsetOldest // 从最旧的消息开始消费
|
|||
config.Consumer.Offsets.AutoCommit.Enable = true // 启动自动提交偏移量
|
|||
config.Consumer.Offsets.AutoCommit.Interval = 1000 * time.Millisecond |
|||
config.Consumer.Fetch.Min = minFetch // 最小拉取 10 KB
|
|||
config.Consumer.Fetch.Max = maxFetch // 最大拉取 5 MB
|
|||
config.Consumer.MaxWaitTime = time.Duration(maxWaitTime) * time.Millisecond // 最大等待时间 ms
|
|||
config.Consumer.Retry.Backoff = 10000 * time.Millisecond // 消费失败后重试的延迟时间
|
|||
//config.Consumer.Retry.BackoffFunc = func(retries int) time.Duration {}
|
|||
config.Consumer.Group.Session.Timeout = 60000 * time.Millisecond // 消费者的心跳 默认10s -> 30s
|
|||
config.Consumer.Group.Heartbeat.Interval = 6000 * time.Millisecond // Heartbeat 这个值必须小于 session.timeout.ms ,一般小于 session.timeout.ms/3,默认是3s
|
|||
config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()} // 设置消费者组的负载均衡策略为轮询策略
|
|||
|
|||
// 创建消费者组
|
|||
client, err := sarama.NewConsumerGroup(h.kafkaConfig.Brokers, h.kafkaConfig.GroupID, config) |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
defer func() { |
|||
_ = client.Close() |
|||
}() |
|||
|
|||
// 启动错误处理协程
|
|||
go func() { |
|||
for err := range client.Errors() { |
|||
log.Println("消费错误:", err) |
|||
} |
|||
}() |
|||
|
|||
// 接收控制信号
|
|||
ctx, cancel := context.WithCancel(context.Background()) |
|||
defer cancel() |
|||
|
|||
go func() { |
|||
for { |
|||
select { |
|||
case signal := <-h.ControlChan: |
|||
switch signal { |
|||
case "stop": |
|||
log.Printf("[Raw-ConsumerGroup-%d] 收到停止信号,将停止消费.", h.kafkaConfig.ClientID) |
|||
h.kafkaPaused = true |
|||
case "resume": |
|||
log.Printf("[Raw-ConsumerGroup-%d] 收到恢复信号,将恢复消费.", h.kafkaConfig.ClientID) |
|||
h.kafkaPaused = false |
|||
} |
|||
case <-ctx.Done(): |
|||
return |
|||
} |
|||
} |
|||
}() |
|||
|
|||
log.Printf("[Raw-ConsumerGroup-%d] 准备启动 Kafka 消费者协程。订阅的主题: %v", h.kafkaConfig.ClientID, h.kafkaConfig.Topic) |
|||
|
|||
// 创建消费者实例
|
|||
consumerInstance := h |
|||
|
|||
var wg sync.WaitGroup |
|||
wg.Add(1) |
|||
go func() { |
|||
defer wg.Done() |
|||
for { |
|||
// 消费 Kafka 消息
|
|||
topics := []string{h.kafkaConfig.Topic} |
|||
err1 := client.Consume(ctx, topics, consumerInstance) // 加入消费者组,并订阅指定主题。Kafka会为每个消费者分配相应的分区。
|
|||
if err1 != nil { |
|||
log.Printf("[Raw-ConsumerGroup-%d] 订阅主题[%v]异常。%s", h.kafkaConfig.ClientID, h.kafkaConfig.Topic, err1.Error()) |
|||
return |
|||
} |
|||
|
|||
if ctx.Err() != nil { |
|||
log.Println(ctx.Err()) |
|||
return |
|||
} |
|||
} |
|||
}() |
|||
|
|||
log.Println("RawData Sarama consumer up and running ...") |
|||
wg.Wait() |
|||
} |
|||
|
|||
// Setup 在新会话开始时运行
|
|||
func (h *RawConsumerGroupHandler) Setup(session sarama.ConsumerGroupSession) error { |
|||
// 在此执行任何必要的设置任务。
|
|||
log.Printf("data_raw消费者组会话开始,%+v \n", session.Claims()) |
|||
return nil |
|||
} |
|||
|
|||
// Cleanup 在会话结束时运行
|
|||
func (h *RawConsumerGroupHandler) Cleanup(session sarama.ConsumerGroupSession) error { |
|||
// 在此执行任何必要的清理任务。
|
|||
log.Println("data_raw消费者组会话结束,", session.Claims()) |
|||
return nil |
|||
} |
|||
|
|||
// ConsumeClaim 启动消费者循环
|
|||
func (h *RawConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
log.Printf("data_raw 处理消费者组会话,%+v, MemberID: %v, Topic: %v, Partition: %v \n", session.Claims(), session.MemberID(), claim.Topic(), claim.Partition()) |
|||
topic := claim.Topic() |
|||
isDeadLetterQueue := false // 是否为死信队列消息
|
|||
if len(topic) > 4 && topic[:4] == "DLP_" { |
|||
isDeadLetterQueue = true |
|||
} |
|||
|
|||
if isDeadLetterQueue { |
|||
return h.DLPConsumeClaim(session, claim) |
|||
} else { |
|||
return h.BatchConsumeClaim(session, claim) |
|||
} |
|||
} |
|||
|
|||
func (h *RawConsumerGroupHandler) BatchConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
//const maxBatchSize = 50 * 1024 // TODO 设置每个批次的最大字节数,例如 50kB
|
|||
maxBatchSize := configLoad.LoadConfig().GetInt("performance.master.kafkaConsumer.data_raw.maxBatchSize") |
|||
messageChannel := make(chan map[string][]*sarama.ConsumerMessage, 100) |
|||
topicHandlerKey := fmt.Sprintf("%s_%d", claim.Topic(), claim.Partition()) |
|||
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey) |
|||
if !isValid { |
|||
log.Printf("[Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
return fmt.Errorf("[Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
} |
|||
|
|||
messageMapPool := sync.Pool{ |
|||
New: func() interface{} { |
|||
return make(map[string][]*sarama.ConsumerMessage) |
|||
}, |
|||
} |
|||
|
|||
// 启动一个 goroutine 来处理消息
|
|||
var wg sync.WaitGroup |
|||
|
|||
pool, _ := ants.NewPool(100) |
|||
defer pool.Release() |
|||
|
|||
var rawRateLimiter = rate.NewLimiter(rate.Every(10*time.Millisecond), 1) |
|||
go func() { |
|||
for thingMessages := range messageChannel { |
|||
_ = rawRateLimiter.Wait(context.Background()) // 控制消费速率
|
|||
|
|||
wg.Add(len(thingMessages)) |
|||
|
|||
for k, v := range thingMessages { |
|||
key := k |
|||
msgs := v |
|||
|
|||
_ = pool.Submit(func() { |
|||
defer wg.Done() |
|||
defer messageMapPool.Put(thingMessages) // 归还到池中
|
|||
|
|||
if len(msgs) == 0 { |
|||
return |
|||
} |
|||
|
|||
values := make([]string, len(msgs)) |
|||
for i, msg := range msgs { |
|||
values[i] = string(msg.Value) |
|||
} |
|||
|
|||
// 处理消息并检查是否成功
|
|||
isProcessed := msgHandler.(func(thingId string, values []string) bool)(key, values) //msgHandler(key, values)
|
|||
if !isProcessed { |
|||
log.Printf("[Raw-ConsumerGroup] 消息处理失败,键: %s,消息: %v", key, msgs) |
|||
} else { |
|||
// 处理成功后,消息标记为已处理
|
|||
for _, msg := range msgs { |
|||
session.MarkMessage(msg, "is handled") |
|||
} |
|||
} |
|||
}) |
|||
} |
|||
} |
|||
}() |
|||
|
|||
batchBuffer := make(map[string][]*sarama.ConsumerMessage) // 按 thingId 分类的批次缓冲
|
|||
currentBatchSize := make(map[string]int) // 记录每个 thingId 的当前批次大小
|
|||
|
|||
// 定义一个定时器,用于处理剩余消息
|
|||
ctx, cancel := context.WithCancel(context.Background()) |
|||
defer cancel() |
|||
|
|||
ticker := time.NewTicker(20 * time.Second) |
|||
defer ticker.Stop() |
|||
go func() { |
|||
for { |
|||
select { |
|||
case <-ticker.C: |
|||
// 处理剩余的消息
|
|||
for thingId, msgs := range batchBuffer { |
|||
if len(msgs) > 0 { |
|||
msgMap := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage) |
|||
msgMap[thingId] = msgs |
|||
messageChannel <- msgMap |
|||
delete(batchBuffer, thingId) |
|||
delete(currentBatchSize, thingId) // 统一清理
|
|||
} |
|||
} |
|||
case <-ctx.Done(): |
|||
return // 退出 Goroutine
|
|||
} |
|||
} |
|||
}() |
|||
|
|||
// 读消息
|
|||
for msg := range claim.Messages() { |
|||
thingId := string(msg.Key) |
|||
if thingId == "" { |
|||
thingId = "thingId-null" |
|||
} |
|||
|
|||
// 将消息添加到批次缓冲
|
|||
batchBuffer[thingId] = append(batchBuffer[thingId], msg) |
|||
|
|||
// 计算当前 key 的消息大小
|
|||
currentBatchSize[thingId] += len(msg.Value) |
|||
|
|||
// 如果当前批次达到 maxBatchSize,发送到通道并重置
|
|||
if currentBatchSize[thingId] >= maxBatchSize { |
|||
// 从池中获取 map 对象
|
|||
thingMessages := messageMapPool.Get().(map[string][]*sarama.ConsumerMessage) |
|||
thingMessages[thingId] = batchBuffer[thingId] |
|||
messageChannel <- thingMessages |
|||
delete(batchBuffer, thingId) // 清除已处理的键
|
|||
delete(currentBatchSize, thingId) // 清除已处理的大小
|
|||
} |
|||
} |
|||
|
|||
close(messageChannel) |
|||
wg.Wait() |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (h *RawConsumerGroupHandler) DLPConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { |
|||
topicHandlerKey := "DLP_DATA_RAW" |
|||
msgHandler, isValid := h.topicHandlers.Load(topicHandlerKey) |
|||
if !isValid { |
|||
log.Printf("[DLP-Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
return fmt.Errorf("[DLP-Raw-ConsumerGroup]Topic[%s]Partitions[%d]无消息处理者。", claim.Topic(), claim.Partition()) |
|||
} |
|||
|
|||
for msg := range claim.Messages() { |
|||
thingId := string(msg.Key) |
|||
if thingId == "" { |
|||
thingId = "thingId-null" |
|||
} |
|||
|
|||
// 解析 value
|
|||
var value []string |
|||
err := json.Unmarshal(msg.Value, &value) |
|||
if err != nil { |
|||
log.Printf("[DLP_Raw-ConsumerGroup]Failed to unmarshal value: %v", err) |
|||
continue |
|||
} |
|||
|
|||
isProcessed := msgHandler.(func(thingId string, values []string) bool)(thingId, value) |
|||
if !isProcessed { |
|||
log.Printf("[DLP_Raw-ConsumerGroup]消息处理失败,thingId: %s,消息: %v", thingId, value) |
|||
} else { |
|||
// 处理成功后,消息标记为已处理
|
|||
session.MarkMessage(msg, "is handled") |
|||
} |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (h *RawConsumerGroupHandler) SetTopicHandler(topicHandlerKey string, fun func(thingId string, values []string) bool) { |
|||
h.mu.Lock() |
|||
defer h.mu.Unlock() |
|||
h.topicHandlers.Store(topicHandlerKey, fun) |
|||
} |
|||
|
|||
// 消息消费启动控制
|
|||
func (h *RawConsumerGroupHandler) SetKafkaPaused(paused bool) { |
|||
h.kafkaPaused = paused |
|||
if paused { |
|||
log.Println("Kafka消息消费 已暂停.") |
|||
} else { |
|||
log.Println("Kafka消息消费 已恢复.") |
|||
} |
|||
} |
|||
|
|||
//// TODO 动态调整消费速率
|
|||
//func (h *RawConsumerGroupHandler) SetConsumeRate(interval time.Duration) {
|
|||
// h.mu.Lock()
|
|||
// defer h.mu.Unlock()
|
|||
// rateLimiter.SetLimit(rate.Every(interval))
|
|||
//}
|
@ -0,0 +1,278 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"github.com/spf13/viper" |
|||
"log" |
|||
"sync" |
|||
) |
|||
|
|||
type KafkaConfig struct { |
|||
ClientID int // 暂时无用
|
|||
Brokers []string |
|||
GroupID string |
|||
Topic string |
|||
Partitions int |
|||
} |
|||
|
|||
type TopicConfig struct { |
|||
Topic string |
|||
Partitions int |
|||
} |
|||
|
|||
type KafkaConsumerConfig struct { |
|||
RawData *RawDataConfig //`yaml:"data_raw"`
|
|||
AggData *AggDataConfig // `yaml:"data_agg"`
|
|||
} |
|||
|
|||
type RawDataConfig struct { |
|||
MaxBatchSize int `yaml:"maxBatchSize"` |
|||
IotaBufSize int `yaml:"iotaBufSize"` |
|||
ProcessBufSize int `yaml:"processBufSize"` |
|||
} |
|||
|
|||
type AggDataConfig struct { |
|||
MaxBatchSize int `yaml:"maxBatchSize"` |
|||
AggBufSize int `yaml:"aggBufSize"` |
|||
} |
|||
|
|||
type KafkaDataSource struct { |
|||
groupId string |
|||
Brokers []string |
|||
Topics map[string]TopicConfig |
|||
Master_kafkaConsumer_config *KafkaConsumerConfig // 性能配置
|
|||
RawDataHandlers *sync.Map // 原始数据处理器
|
|||
AggDataHandlers *sync.Map // 聚集数据处理器
|
|||
|
|||
kafkaPaused bool |
|||
controlChan chan string // 控制信号通道
|
|||
} |
|||
|
|||
func NewKafkaDataSource() *KafkaDataSource { |
|||
k := &KafkaDataSource{ |
|||
controlChan: make(chan string), |
|||
} |
|||
|
|||
k.loadKafkaConfig() |
|||
return k |
|||
} |
|||
|
|||
func (s *KafkaDataSource) loadKafkaConfig() { |
|||
vp := configLoad.LoadConfig() |
|||
groupId := vp.GetString("kafka.groupId") |
|||
brokers := vp.GetStringSlice("kafka.Brokers") |
|||
log.Println("消费者组 kafka.groupId:", groupId) |
|||
|
|||
s.groupId = groupId |
|||
s.Brokers = brokers |
|||
s.loadTopics(vp) |
|||
s.loadKafkaConsumerConfig(vp) |
|||
} |
|||
func (s *KafkaDataSource) loadTopics(vp *viper.Viper) { |
|||
topics := make(map[string]TopicConfig) |
|||
|
|||
// 定义要加载的主题列表
|
|||
topicNames := []string{"data_raw", "data_agg"} |
|||
|
|||
for _, topicName := range topicNames { |
|||
topic := vp.GetString(fmt.Sprintf("kafka.Topics.%s.topic", topicName)) |
|||
if topic == "" { |
|||
log.Printf("主题 kafka.Topics.%s.topic 配置为空", topicName) |
|||
continue |
|||
} |
|||
|
|||
partitions := vp.GetInt(fmt.Sprintf("kafka.Topics.%s.partitions", topicName)) |
|||
if partitions <= 0 { |
|||
partitions = 1 |
|||
} |
|||
|
|||
topics[topicName] = TopicConfig{ |
|||
Topic: topic, |
|||
Partitions: partitions, |
|||
} |
|||
} |
|||
|
|||
s.Topics = topics |
|||
} |
|||
func (s *KafkaDataSource) loadKafkaConsumerConfig(vp *viper.Viper) { |
|||
// 获取 kafkaConsumer 部分的配置
|
|||
kafkaConsumerKey := "performance.master.kafkaConsumer" |
|||
if !vp.IsSet(kafkaConsumerKey) { |
|||
log.Panicf("配置 %s 必须存在", kafkaConsumerKey) |
|||
} |
|||
|
|||
// 创建 KafkaConsumerConfig 实例
|
|||
config := &KafkaConsumerConfig{} |
|||
|
|||
// 解析 data_raw 配置
|
|||
if vp.IsSet(kafkaConsumerKey + ".data_raw") { |
|||
dataRaw := &RawDataConfig{} |
|||
if err := vp.UnmarshalKey(kafkaConsumerKey+".data_raw", dataRaw); err != nil { |
|||
log.Panicf("解析 data_raw 配置失败: %v\n", err) |
|||
} else { |
|||
config.RawData = dataRaw |
|||
} |
|||
} |
|||
|
|||
// 解析 data_agg 配置
|
|||
if vp.IsSet(kafkaConsumerKey + ".data_agg") { |
|||
dataAgg := &AggDataConfig{} |
|||
if err := vp.UnmarshalKey(kafkaConsumerKey+".data_agg", dataAgg); err != nil { |
|||
log.Panicf("解析 data_agg 配置失败: %v\n", err) |
|||
} else { |
|||
config.AggData = dataAgg |
|||
} |
|||
} |
|||
|
|||
s.Master_kafkaConsumer_config = config |
|||
} |
|||
|
|||
func (s *KafkaDataSource) AggDataProducer() { |
|||
var wg sync.WaitGroup |
|||
const topicCfgKey = "data_agg" |
|||
topicCfg := s.Topics[topicCfgKey] |
|||
|
|||
if topicCfg.Topic == "" { |
|||
log.Printf("Error: 启动 AggData Producer 失败,无 kafka.topics.data_agg 配置。") |
|||
return |
|||
} |
|||
|
|||
if s.Master_kafkaConsumer_config.AggData == nil { |
|||
log.Printf("Error: 启动 AggData Producer 失败,无 performance.master.kafkaConsumer.data_agg 配置。") |
|||
return |
|||
} |
|||
|
|||
// 启动工作协程
|
|||
wg.Add(1) |
|||
go func(clientID int) { |
|||
defer wg.Done() |
|||
kafkaHandler := NewAggConsumerGroupHandler(KafkaConfig{ |
|||
Brokers: s.Brokers, |
|||
GroupID: s.groupId, |
|||
Topic: topicCfg.Topic, |
|||
Partitions: topicCfg.Partitions, |
|||
ClientID: clientID, |
|||
}) |
|||
kafkaHandler.ControlChan = s.controlChan |
|||
|
|||
// 装载配置
|
|||
for partId := 0; partId < topicCfg.Partitions; partId++ { |
|||
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId) |
|||
msgHandler, ok := s.getDataHandler(topicCfgKey, key) |
|||
|
|||
if !ok || msgHandler == nil { |
|||
log.Panicf("Kafka topic[%s] 未定义data_agg 消息处理者,跳过。\n", key) |
|||
continue |
|||
} |
|||
|
|||
// 把消息传递给 dataSource/kafka/AggDataHandler.HandleMessage([]string)
|
|||
kafkaHandler.SetTopicHandler(key, msgHandler.HandleMessage) |
|||
} |
|||
|
|||
// 失败消息处理者
|
|||
dlpKey := "DLP_DATA_RAW" |
|||
dataHandler, ok := s.RawDataHandlers.Load(dlpKey) |
|||
if !ok { |
|||
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", dlpKey) |
|||
} |
|||
msgHandler, _ := dataHandler.(IMessageHandler) |
|||
kafkaHandler.SetTopicHandler(dlpKey, msgHandler.HandleMessage) |
|||
|
|||
// 启动消费组
|
|||
kafkaHandler.ConnectConsumerGroup() |
|||
|
|||
}(1) |
|||
|
|||
wg.Wait() |
|||
} |
|||
|
|||
// Producer 将 kafka message -> 各数据模型 -> 各数据通道
|
|||
func (s *KafkaDataSource) RawDataProducer() { |
|||
var wg sync.WaitGroup |
|||
const topicCfgKey = "data_raw" |
|||
topicCfg := s.Topics[topicCfgKey] |
|||
|
|||
if topicCfg.Topic == "" { |
|||
log.Printf("Error: 启动 RawData Producer 失败,无 kafka.topics.data_raw 配置。") |
|||
return |
|||
} |
|||
|
|||
if s.Master_kafkaConsumer_config.RawData == nil { |
|||
log.Printf("Error: 启动 RawData Producer 失败,无 performance.master.kafkaConsumer.data_raw 配置。") |
|||
return |
|||
} |
|||
|
|||
// 启动工作协程
|
|||
wg.Add(1) |
|||
go func(clientID int) { |
|||
defer wg.Done() |
|||
kafkaHandler := NewRawConsumerGroupHandler(KafkaConfig{ |
|||
Brokers: s.Brokers, |
|||
GroupID: s.groupId, |
|||
Topic: topicCfg.Topic, |
|||
Partitions: topicCfg.Partitions, |
|||
ClientID: clientID, |
|||
}) |
|||
kafkaHandler.ControlChan = s.controlChan |
|||
|
|||
for partId := 0; partId < topicCfg.Partitions; partId++ { |
|||
key := fmt.Sprintf("%s_%d", topicCfg.Topic, partId) |
|||
msgHandler, ok := s.getDataHandler(topicCfgKey, key) |
|||
if !ok || msgHandler == nil { |
|||
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", key) |
|||
continue |
|||
} |
|||
// 把消息传递给 dataSource/kafka/RawDataHandler.HandleMessage([]string)
|
|||
kafkaHandler.SetTopicHandler(key, msgHandler.HandleMessage) |
|||
} |
|||
|
|||
// 失败消息处理者
|
|||
dlpKey := "DLP_DATA_RAW" |
|||
dataHandler, ok := s.RawDataHandlers.Load(dlpKey) |
|||
if !ok { |
|||
log.Panicf("Kafka topic[%s] 未定义消息处理者,跳过。\n", dlpKey) |
|||
} |
|||
msgHandler, _ := dataHandler.(IMessageHandler) |
|||
kafkaHandler.SetTopicHandler(dlpKey, msgHandler.HandleMessage) |
|||
|
|||
//启动消费
|
|||
kafkaHandler.ConnectConsumerGroup() |
|||
|
|||
}(1) |
|||
|
|||
wg.Wait() |
|||
} |
|||
|
|||
// 根据 key 获取 dataHandler
|
|||
func (s *KafkaDataSource) getDataHandler(topicCfg, key string) (IMessageHandler, bool) { |
|||
var dataHandler any |
|||
var exists bool |
|||
|
|||
if topicCfg == "data_agg" { |
|||
dataHandler, exists = s.AggDataHandlers.Load(key) |
|||
} else if topicCfg == "data_raw" { |
|||
dataHandler, exists = s.RawDataHandlers.Load(key) |
|||
} |
|||
|
|||
if !exists { |
|||
return nil, false |
|||
} |
|||
|
|||
handler, ok := dataHandler.(IMessageHandler) |
|||
if !ok { |
|||
return nil, false |
|||
} |
|||
|
|||
return handler, true |
|||
} |
|||
|
|||
// 发送停止信号
|
|||
func (s *KafkaDataSource) StopConsumers() { |
|||
s.controlChan <- "stop" |
|||
} |
|||
|
|||
// 发送恢复信号
|
|||
func (s *KafkaDataSource) ResumeConsumers() { |
|||
s.controlChan <- "resume" |
|||
} |
@ -0,0 +1,67 @@ |
|||
package data_source |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
"github.com/IBM/sarama" |
|||
"log" |
|||
"time" |
|||
) |
|||
|
|||
type KafkaProducer struct { |
|||
producer sarama.SyncProducer |
|||
brokers []string |
|||
} |
|||
|
|||
func NewKafkaProducer(brokers []string) (*KafkaProducer, error) { |
|||
// 配置 Kafka 生产者
|
|||
producerConfig := sarama.NewConfig() |
|||
producerConfig.Producer.Return.Successes = true // 返回成功发送的消息
|
|||
producerConfig.Producer.Return.Errors = true // 返回发送失败的消息
|
|||
producerConfig.Producer.RequiredAcks = sarama.WaitForAll // 等待所有副本确认
|
|||
producerConfig.Producer.Timeout = 10 * time.Second // 生产者超时时间
|
|||
producerConfig.Producer.Retry.Max = 3 // 最大重试次数
|
|||
producerConfig.Producer.Retry.Backoff = 100 * time.Millisecond // 重试间隔时间
|
|||
producerConfig.Producer.Compression = sarama.CompressionSnappy // 使用 Snappy 压缩
|
|||
producerConfig.Producer.MaxMessageBytes = 1024 * 1024 * 4 // 单条消息最大 4MB
|
|||
|
|||
producer, err := sarama.NewSyncProducer(brokers, producerConfig) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("failed to create Kafka producer:%v", err) |
|||
} |
|||
|
|||
return &KafkaProducer{ |
|||
producer: producer, |
|||
brokers: brokers, |
|||
}, nil |
|||
} |
|||
|
|||
// 实现将 messages 发送到 Kafka 的指定 topic
|
|||
func (kp *KafkaProducer) SendStringArrayMessage(topic, msgKey string, values []string) error { |
|||
// 将 value 序列化为 JSON
|
|||
valueBytes, err := json.Marshal(values) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to marshal value: %v", err) |
|||
} |
|||
|
|||
// 构造 Kafka 消息
|
|||
msg := &sarama.ProducerMessage{ |
|||
Topic: topic, |
|||
Key: sarama.StringEncoder(msgKey), |
|||
Value: sarama.ByteEncoder(valueBytes), |
|||
} |
|||
|
|||
// 发送消息
|
|||
_, _, err = kp.producer.SendMessage(msg) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to send message to Kafka: %v", err) |
|||
} |
|||
|
|||
log.Printf("Message sent successfully: key=%s, len(value)=%v", msgKey, len(values)) |
|||
return nil |
|||
} |
|||
|
|||
// Close 关闭 Kafka 生产者
|
|||
func (kp *KafkaProducer) Close() error { |
|||
return kp.producer.Close() |
|||
} |
@ -0,0 +1,112 @@ |
|||
package node_manager |
|||
|
|||
import ( |
|||
"context" |
|||
"et_rpc/pb" |
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/credentials/insecure" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"time" |
|||
|
|||
pool "github.com/jolestar/go-commons-pool" |
|||
) |
|||
|
|||
type GRPCPoolObject struct { |
|||
Conn *grpc.ClientConn // 保存 gRPC 连接
|
|||
Client pb.NodeServiceClient // gRPC 客户端
|
|||
} |
|||
|
|||
type GRPCClientFactory struct { |
|||
address string |
|||
} |
|||
|
|||
// NewGRPCClientFactory 创建新的 gRPC 连接工厂
|
|||
func NewGRPCClientFactory(address string) *GRPCClientFactory { |
|||
return &GRPCClientFactory{ |
|||
address: address, |
|||
} |
|||
} |
|||
|
|||
func (f *GRPCClientFactory) MakeObject(ctx context.Context) (*pool.PooledObject, error) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
|||
defer cancel() |
|||
|
|||
// 定义重试策略
|
|||
serviceConfig := `{ |
|||
"methodConfig": [{ |
|||
"name": [{"service": "NodeService", "method": "*"}], |
|||
"retryPolicy": { |
|||
"maxAttempts": 2, |
|||
"initialBackoff": "1s", |
|||
"maxBackoff": "10s", |
|||
"backoffMultiplier": 2, |
|||
"retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"] |
|||
} |
|||
}] |
|||
}` |
|||
|
|||
conn, err := grpc.NewClient( |
|||
f.address, |
|||
grpc.WithTransportCredentials(insecure.NewCredentials()), |
|||
grpc.WithDefaultServiceConfig(serviceConfig), |
|||
) |
|||
|
|||
if err != nil { |
|||
return nil, err // 如果3次都失败,返回错误
|
|||
} |
|||
|
|||
client := pb.NewNodeServiceClient(conn) |
|||
return pool.NewPooledObject( |
|||
&GRPCPoolObject{ |
|||
Conn: conn, |
|||
Client: client, |
|||
}, |
|||
), nil |
|||
} |
|||
|
|||
// 销毁 gRPC 连接
|
|||
func (f *GRPCClientFactory) DestroyObject(ctx context.Context, object *pool.PooledObject) error { |
|||
grpcPoolObj := object.Object.(*GRPCPoolObject) |
|||
if grpcPoolObj.Client != nil { |
|||
// 关闭连接
|
|||
grpcPoolObj.Conn.Close() // gRPC 客户端连接关闭
|
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// 验证 gRPC 连接的有效性
|
|||
func (f *GRPCClientFactory) ValidateObject(ctx context.Context, object *pool.PooledObject) bool { |
|||
grpcPoolObj := object.Object.(*GRPCPoolObject) |
|||
|
|||
select { |
|||
case <-ctx.Done(): |
|||
return false // 如果上下文已经取消,返回无效
|
|||
default: |
|||
// 继续进行有效性检查
|
|||
} |
|||
|
|||
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn) |
|||
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{ |
|||
Service: "NodeService", |
|||
}) |
|||
|
|||
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING { |
|||
log.Println("ValidateObject failed:", err) |
|||
return false |
|||
} |
|||
|
|||
return true |
|||
} |
|||
|
|||
// 激活 gRPC 连接
|
|||
func (f *GRPCClientFactory) ActivateObject(ctx context.Context, object *pool.PooledObject) error { |
|||
// 可以在这里发送心跳请求以确保连接有效
|
|||
return nil |
|||
} |
|||
|
|||
// 非激活 gRPC 连接
|
|||
func (f *GRPCClientFactory) PassivateObject(ctx context.Context, object *pool.PooledObject) error { |
|||
// 可以在这里进行连接的重置,例如清除状态或缓存
|
|||
return nil |
|||
} |
@ -0,0 +1,128 @@ |
|||
package node_manager |
|||
|
|||
import ( |
|||
"et_rpc" |
|||
"fmt" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
const ( |
|||
Node_Load_Change_Threshold = 200 |
|||
Node_Load_Threshold = 200 // 节点数据积压阈值
|
|||
Node_Refresh_Interval = 120 * time.Second // 节点信息刷新间隔
|
|||
) |
|||
|
|||
type LoadBalancer struct { |
|||
nodes []*NodeConnection |
|||
nodeSelector INodeSelector // 节点选择器
|
|||
mu sync.RWMutex |
|||
} |
|||
|
|||
func NewLoadBalancer(selector INodeSelector) *LoadBalancer { |
|||
lb := &LoadBalancer{ |
|||
nodes: make([]*NodeConnection, 0), |
|||
nodeSelector: selector, |
|||
mu: sync.RWMutex{}, |
|||
} |
|||
|
|||
// TODO 启动健康度检查
|
|||
//go lb.HealthCheck()
|
|||
|
|||
return lb |
|||
} |
|||
|
|||
func (b *LoadBalancer) AddNode(node *NodeConnection) { |
|||
b.mu.Lock() |
|||
defer b.mu.Unlock() |
|||
b.nodes = append(b.nodes, node) |
|||
} |
|||
|
|||
func (b *LoadBalancer) RemoveNode(addr string) bool { |
|||
b.mu.Lock() |
|||
defer b.mu.Unlock() |
|||
for i, node := range b.nodes { |
|||
if node.Addr == addr { |
|||
b.nodes = append(b.nodes[:i], b.nodes[i+1:]...) |
|||
return true |
|||
} |
|||
} |
|||
return false |
|||
} |
|||
|
|||
func (b *LoadBalancer) SetSelector(selector INodeSelector) { |
|||
b.mu.Lock() |
|||
defer b.mu.Unlock() |
|||
b.nodeSelector = selector |
|||
} |
|||
|
|||
// SelectNode 通过节点选择器选择节点
|
|||
func (b *LoadBalancer) SelectNode() (*NodeConnection, error) { |
|||
b.mu.RLock() |
|||
defer b.mu.RUnlock() |
|||
return b.nodeSelector.Select(b.nodes) |
|||
} |
|||
|
|||
// UpdateNode 事件驱动更新节点,isError 立即更新
|
|||
func (b *LoadBalancer) UpdateNode(nodeArgs *et_rpc.NodeArgs, isError bool) error { |
|||
b.mu.Lock() |
|||
defer b.mu.Unlock() |
|||
|
|||
// 不健康:status = NodeState_Unhealthy 或者 Load 超阈值
|
|||
for _, node := range b.nodes { |
|||
if node.Addr == nodeArgs.Addr { |
|||
isOverThreshold := abs(node.NArgs.Load-nodeArgs.Load) > Node_Load_Change_Threshold // 荷载变化超阈值
|
|||
isTimeout := time.Since(node.lastUpdate) > Node_Refresh_Interval // 超刷新间隔
|
|||
if isError || isOverThreshold || isTimeout { |
|||
node.NArgs.Load = nodeArgs.Load |
|||
node.NArgs.Status = et_rpc.NodeState_Healthy //TODO node.GetHealthStatus(nodeArgs)
|
|||
node.lastUpdate = time.Now() |
|||
} |
|||
return nil |
|||
} |
|||
} |
|||
|
|||
return fmt.Errorf("未注册的节点: %s", nodeArgs.Addr) |
|||
} |
|||
|
|||
func (b *LoadBalancer) NodeExists(nodeAddr string) bool { |
|||
b.mu.RLock() |
|||
defer b.mu.RUnlock() |
|||
for _, node := range b.nodes { |
|||
if node.Addr == nodeAddr { |
|||
return true |
|||
} |
|||
} |
|||
return false |
|||
} |
|||
|
|||
func abs(x int) int { |
|||
if x < 0 { |
|||
return -x |
|||
} |
|||
return x |
|||
} |
|||
|
|||
// 定时健康检查、更新节点状态
|
|||
//func (b *LoadBalancer) HealthCheck() {
|
|||
// for {
|
|||
// b.mu.Lock()
|
|||
// reply := new(et_rpc.NodeArgs)
|
|||
// for _, node := range b.nodes {
|
|||
// result := b.checkNodeHealth(node, reply)
|
|||
// b.UpdateNode(reply, result)
|
|||
// }
|
|||
// b.mu.Unlock()
|
|||
//
|
|||
// time.Sleep(5 * time.Minute)
|
|||
// }
|
|||
//}
|
|||
//
|
|||
//// 健康检查的具体实现
|
|||
//func (b *LoadBalancer) checkNodeHealth(conn *NodeConnection, reply *et_rpc.NodeArgs) bool {
|
|||
// // 健康检查,例如发送心跳请求等
|
|||
// err := conn.Call(context.Background(), et_rpc.RPCService_Node_Ping, &et_rpc.NodeArgs{}, reply)
|
|||
// return err == nil
|
|||
//
|
|||
// // TODO 根据返回信息:节点的CPU、内存、硬盘使用情况、数据积压情况来判断节点的健康情况
|
|||
//}
|
@ -0,0 +1,186 @@ |
|||
package node_manager |
|||
|
|||
import ( |
|||
"context" |
|||
"et_rpc" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
pool "github.com/jolestar/go-commons-pool" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type NodeConnection struct { |
|||
Addr string |
|||
NArgs *et_rpc.NodeArgs |
|||
rpcPool *pool.ObjectPool |
|||
lastUpdate time.Time // 节点信息更新时间
|
|||
ctx context.Context |
|||
mu sync.Mutex |
|||
} |
|||
|
|||
// NewNodeConnection 创建一个 NodeConnection
|
|||
// TODO NewNodeConnection从配置文件中获取 pool 参数
|
|||
func NewNodeConnection(args *et_rpc.NodeArgs) (*NodeConnection, error) { |
|||
ctx := context.Background() |
|||
factory := NewGRPCClientFactory(args.Addr) |
|||
p := pool.NewObjectPoolWithDefaultConfig(ctx, factory) |
|||
p.Config.MaxTotal = 400 |
|||
p.Config.MinIdle = 200 |
|||
p.Config.TestOnBorrow = true |
|||
p.Config.TestOnReturn = false |
|||
p.Config.TestWhileIdle = true // 是否在空闲时检查连接有效性
|
|||
p.Config.MinEvictableIdleTime = 30 * time.Minute //空闲连接最小可驱逐时间
|
|||
//p.Config.SoftMinEvictableIdleTime = 15 * time.Minute //空闲连接软最小可驱逐时间
|
|||
|
|||
nodeConn := &NodeConnection{ |
|||
ctx: ctx, |
|||
Addr: args.Addr, |
|||
rpcPool: p, |
|||
NArgs: args, |
|||
} |
|||
|
|||
// 获取连接进行简单的测试
|
|||
obj, err := nodeConn.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("建立RPC连接失败:%w", err) |
|||
} |
|||
defer nodeConn.rpcPool.ReturnObject(ctx, obj) |
|||
|
|||
grpcPoolObj, ok := obj.(*GRPCPoolObject) |
|||
if !ok { |
|||
log.Fatalf("类型断言失败,obj 不是 *GRPCPoolObject 类型") |
|||
} |
|||
|
|||
// 健康检查
|
|||
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn) |
|||
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{ |
|||
Service: "NodeService", |
|||
}) |
|||
|
|||
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING { |
|||
return nil, fmt.Errorf("健康检查失败: %v, 状态: %v", err, resp.Status) |
|||
} |
|||
|
|||
return nodeConn, nil |
|||
} |
|||
|
|||
func (n *NodeConnection) GetHealthStatus(args *common_models.NodeArgs) et_rpc.NodeState { |
|||
// TODO CPU/Memory/Disk 使用是否超过阈值
|
|||
//resourcesIsOver := true
|
|||
|
|||
return et_rpc.NodeState_Healthy |
|||
|
|||
// 荷载是否超过阈值、访问RPC失败
|
|||
//if args.Load > Node_Load_Threshold || args.Status == et_rpc.NodeState_Unhealthy {
|
|||
// return et_rpc.NodeState_Unhealthy
|
|||
//} else {
|
|||
// return et_rpc.NodeState_Healthy
|
|||
//}
|
|||
} |
|||
|
|||
// 更新节点信息
|
|||
//func (n *NodeConnection) UpdateNodeArgs(args *et_rpc.NodeArgs, forceUpdate bool) {
|
|||
// n.mu.Lock()
|
|||
// defer n.mu.Unlock()
|
|||
//
|
|||
// // 检查是否需要更新节点信息
|
|||
// isOverThreshold := abs(n.NArgs.Load-args.Load) > Node_Load_Change_Threshold // 荷载变化超阈值
|
|||
// isTimeout := time.Since(n.lastUpdate) > Node_Refresh_Interval // 超刷新间隔
|
|||
//
|
|||
// if forceUpdate || isOverThreshold || isTimeout {
|
|||
// // 更新节点信息
|
|||
// n.NArgs.Load = args.Load
|
|||
// n.NArgs.Status = n.GetHealthStatus(args)
|
|||
// n.lastUpdate = time.Now()
|
|||
// }
|
|||
//}
|
|||
|
|||
func (n *NodeConnection) CallHandleIotaData(id string, messages []string) error { |
|||
// 创建新的上下文并设置超时
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// 从连接池中借用一个连接
|
|||
obj1, err := n.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return fmt.Errorf("gRPC[HandleIotaData] 借用对象错误: %w", err) |
|||
} |
|||
|
|||
// 使用连接相关处理
|
|||
rpcPoolObj, ok := obj1.(*GRPCPoolObject) |
|||
if !ok { |
|||
log.Fatalf("类型断言失败,obj1 不是 *GRPCPoolObject 类型") |
|||
} |
|||
|
|||
defer func() { |
|||
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil { |
|||
log.Printf("gRPC[HandleIotaData] 归还对象到连接池失败: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// 进行 RPC 调用
|
|||
request := &pb.HandleDataRequest{ |
|||
Id: id, |
|||
Messages: messages, |
|||
} |
|||
|
|||
startTime := time.Now() |
|||
_, err = rpcPoolObj.Client.HandleIotaData(ctx, request) |
|||
duration := time.Since(startTime) |
|||
|
|||
if err != nil { |
|||
log.Printf("调用失败。gRPC[HandleIotaData] 错误: %v, 耗时: %v", err, duration) |
|||
return fmt.Errorf("调用失败。gRPC[HandleIotaData] 错误: %w", err) |
|||
} |
|||
|
|||
//log.Printf("调用成功。gRPC[HandleIotaData] resp=%+v, 耗时: %v", resp, duration)
|
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (n *NodeConnection) CallHandleAggData(id string, messages []string) error { |
|||
// 创建新的上下文并设置超时
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// 从连接池中借用一个连接
|
|||
obj1, err := n.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return fmt.Errorf("gRPC[HandleAggData] 借用对象错误: %w", err) |
|||
} |
|||
|
|||
// 使用连接相关处理
|
|||
rpcPoolObj, ok := obj1.(*GRPCPoolObject) |
|||
if !ok { |
|||
log.Fatalf("类型断言失败,obj1 不是 *GRPCPoolObject 类型") |
|||
} |
|||
|
|||
defer func() { |
|||
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil { |
|||
log.Printf("gRPC[HandleAggData] 归还对象到连接池失败: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// 进行 RPC 调用
|
|||
request := &pb.HandleDataRequest{ |
|||
Id: id, |
|||
Messages: messages, |
|||
} |
|||
|
|||
startTime := time.Now() |
|||
_, err = rpcPoolObj.Client.HandleAggData(ctx, request) |
|||
duration := time.Since(startTime) |
|||
|
|||
if err != nil { |
|||
log.Printf("调用失败。gRPC[HandleAggData] 错误: %v, 耗时: %v", err, duration) |
|||
return fmt.Errorf("调用失败。gRPC[HandleAggData] 错误: %w", err) |
|||
} |
|||
|
|||
//log.Printf("调用成功。gRPC[HandleAggData] resp=%+v, 耗时: %v", resp, duration)
|
|||
|
|||
return nil |
|||
} |
@ -0,0 +1,58 @@ |
|||
package node_manager |
|||
|
|||
import ( |
|||
"et_rpc" |
|||
"log" |
|||
) |
|||
|
|||
// NodeManager 和 rpcPool 提供了高效的节点管理和 RPC 连接管理功能,支持高并发和连接复用,提升系统性能和稳定性。
|
|||
type NodeManager struct { |
|||
loadBalancer *LoadBalancer |
|||
} |
|||
|
|||
func NewNodeManager(lb *LoadBalancer) *NodeManager { |
|||
return &NodeManager{ |
|||
loadBalancer: lb, |
|||
} |
|||
} |
|||
|
|||
func (m *NodeManager) AddNode(args *et_rpc.NodeArgs) error { |
|||
nodeConn, err := NewNodeConnection(args) |
|||
if err != nil { |
|||
log.Printf("添加Node节点失败:%s\n", err) |
|||
} else { |
|||
m.loadBalancer.AddNode(nodeConn) |
|||
log.Printf("添加Node节点: %s\n", args.Addr) |
|||
} |
|||
|
|||
nodeConn.rpcPool.GetNumIdle() |
|||
|
|||
log.Printf("master共有 %d 个节点", m.NodesCount()) |
|||
for _, node := range m.loadBalancer.nodes { |
|||
log.Printf("master -> Node[%s] 的空闲连接有 %d 个。", node.Addr, node.rpcPool.GetNumIdle()) |
|||
} |
|||
|
|||
return err |
|||
} |
|||
|
|||
func (m *NodeManager) RemoveNode(addr string) bool { |
|||
return m.loadBalancer.RemoveNode(addr) |
|||
//log.Printf("删除Node节点: %s\n", addr)
|
|||
} |
|||
|
|||
// UpdateNode 更新节点信息,isError 立即更新
|
|||
func (m *NodeManager) UpdateNode(nodeArgs *et_rpc.NodeArgs, isError bool) error { |
|||
return m.loadBalancer.UpdateNode(nodeArgs, isError) |
|||
} |
|||
|
|||
func (m *NodeManager) NodeExists(nodeAddr string) bool { |
|||
return m.loadBalancer.NodeExists(nodeAddr) |
|||
} |
|||
|
|||
func (m *NodeManager) GetNodeConnection() (*NodeConnection, error) { |
|||
return m.loadBalancer.SelectNode() |
|||
} |
|||
|
|||
func (m *NodeManager) NodesCount() int { |
|||
return len(m.loadBalancer.nodes) |
|||
} |
@ -0,0 +1,56 @@ |
|||
package node_manager |
|||
|
|||
import ( |
|||
"fmt" |
|||
"sync/atomic" |
|||
) |
|||
|
|||
type INodeSelector interface { |
|||
Select(nodes []*NodeConnection) (*NodeConnection, error) |
|||
} |
|||
|
|||
type RoundRobinSelector struct { |
|||
index int32 |
|||
} |
|||
|
|||
func (s *RoundRobinSelector) Select(nodes []*NodeConnection) (*NodeConnection, error) { |
|||
if len(nodes) == 0 { |
|||
return nil, fmt.Errorf("没有可用的节点") |
|||
} |
|||
|
|||
// 原子读取当前索引
|
|||
currentIndex := atomic.LoadInt32(&s.index) |
|||
selectedNode := nodes[currentIndex%int32(len(nodes))] |
|||
|
|||
// TODO 检查节点状态, 暂时先不检查节点健康状态
|
|||
s.UpdateIndex() // 如果节点健康,更新索引
|
|||
if selectedNode == nil { |
|||
return nil, fmt.Errorf("无此索引的节点。%d", currentIndex) |
|||
} |
|||
|
|||
return selectedNode, nil |
|||
|
|||
//if selectedNode.NArgs.Status == et_rpc.NodeState_Healthy {
|
|||
// s.UpdateIndex() // 如果节点健康,更新索引
|
|||
// return selectedNode, nil
|
|||
//}
|
|||
|
|||
// 如果当前节点不健康,尝试查找下一个健康节点
|
|||
//for i := 1; i < len(nodes); i++ { // 从下一个节点开始查找
|
|||
// nextIndex := (currentIndex + int32(i)) % int32(len(nodes))
|
|||
// selectedNode = nodes[nextIndex]
|
|||
// if selectedNode.NArgs.Status == et_rpc.NodeState_Healthy {
|
|||
// s.UpdateIndex() // 找到健康节点,更新索引
|
|||
// return selectedNode, nil
|
|||
// }
|
|||
//}
|
|||
//
|
|||
//// 如果没有健康节点,重置索引并返回错误
|
|||
//atomic.StoreInt32(&s.index, 0)
|
|||
//return nil, fmt.Errorf("所有节点都不健康")
|
|||
} |
|||
|
|||
// 更新索引的单独方法
|
|||
func (s *RoundRobinSelector) UpdateIndex() { |
|||
atomic.AddInt32(&s.index, 1) // 原子增加索引
|
|||
} |
@ -1,85 +0,0 @@ |
|||
package agg_worker |
|||
|
|||
import ( |
|||
"et_analyze" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
"github.com/google/uuid" |
|||
"log" |
|||
"net/rpc" |
|||
"node/et_worker/et_recv" |
|||
"os" |
|||
"time" |
|||
) |
|||
|
|||
type AggNode struct { |
|||
recvDataHandler *et_recv.RecvDataHanler |
|||
} |
|||
|
|||
func NewAggWorker() *AggNode { |
|||
return &AggNode{ |
|||
recvDataHandler: et_recv.NewRecvDataHanler(), |
|||
} |
|||
} |
|||
|
|||
// Handler 是 RPC 接口,由 master 远程调用
|
|||
func (the *AggNode) Handler(aggData common_models.AggData, replay *bool) error { |
|||
*replay = true |
|||
err := the.ConsumerProcess(&aggData) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// ConsumerProcess 处理阈值判断业务
|
|||
func (the *AggNode) ConsumerProcess(aggData *common_models.AggData) error { |
|||
aggHandler := et_analyze.NewAggThresholdHandler() |
|||
aggHandler.ProcessData(aggData) |
|||
log.Printf("rpc聚集阈值分析[%d]-time[%s]-[%v]", aggData.SensorId, aggData.Date, aggData.Agg) |
|||
return nil |
|||
} |
|||
|
|||
// RegisterToMaster 调用 master 发布的RPC服务方法 master.NodeRegister
|
|||
func (the *AggNode) RegisterToMaster() { |
|||
connectCount := 0 |
|||
for { |
|||
connectCount++ |
|||
if connectCount > 3 { |
|||
log.Printf("RegisterToMaster 失败 超过%d次,准备退出", connectCount-1) |
|||
time.Sleep(time.Second * 10) |
|||
os.Exit(1) |
|||
} |
|||
masterAddr := os.Getenv("masterAddr") |
|||
if masterAddr == "" { |
|||
masterAddr = "127.0.0.1:50000" |
|||
} |
|||
|
|||
time.Sleep(time.Second * 1) |
|||
master, err := rpc.Dial("tcp", masterAddr) |
|||
if err != nil { |
|||
log.Printf("链接失败-> node[%s]", masterAddr) |
|||
continue |
|||
} |
|||
|
|||
//todo 获取node自己地址
|
|||
nodeAddr := "127.0.0.1:40001" |
|||
status := `{"health_status":"healthy","load_average":{"1_min":0.75,"5_min":1.2,"15_min":0.9},"availability":"available","last_check_time":"2022-01-01T12:00:00Z"}` |
|||
resources := `{"cpu":{"cores":4,"usage":"50%","temperature":"60°C"},"memory":{"total":"8GB","used":"4GB","available":"4GB"},"storage":{"total":"256GB","used":"100GB","available":"156GB"}}` |
|||
nodeArgs := &common_models.NodeArgs{ |
|||
ID: uuid.New().String(), |
|||
NodeType: "aggNode", |
|||
Status: status, |
|||
Resources: resources, |
|||
Addr: nodeAddr, |
|||
ThingIds: []string{}, |
|||
} |
|||
|
|||
var result bool |
|||
err = master.Call("master.NodeRegister", &nodeArgs, &result) |
|||
if err != nil { |
|||
log.Printf("node[%s]注册到master[%s]异常:%v", masterAddr, nodeAddr, result) |
|||
continue |
|||
} |
|||
break |
|||
} |
|||
} |
@ -1,273 +1,306 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"context" |
|||
"et_analyze" |
|||
"et_rpc" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
"gitea.anxinyun.cn/container/common_utils" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/health" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"net/rpc" |
|||
"node/et_worker/et_recv" |
|||
"net" |
|||
"os" |
|||
"os/signal" |
|||
"strings" |
|||
"sync" |
|||
"syscall" |
|||
"time" |
|||
) |
|||
|
|||
type EtNode struct { |
|||
nodeInfo *common_models.NodeArgs |
|||
master *rpcMaster |
|||
ch chan *common_models.ProcessData |
|||
recvDataHandler *et_recv.RecvDataHanler |
|||
aggAnalyzeHandler *et_analyze.AggThresholdHandler |
|||
type ETNode struct { |
|||
// node rpc server 相关信息
|
|||
grpcServer *grpc.Server |
|||
nodeServer *NodeServiceServer |
|||
grpcServerStarted chan struct{} // 通知主程序RPC已经启动
|
|||
processChannels []chan []*common_models.ProcessData |
|||
groupDataChan chan []*common_models.ProcessData // 分组数据
|
|||
// node 信息
|
|||
nodeInfo *et_rpc.NodeArgs |
|||
Addr string |
|||
// master 信息
|
|||
masterAddr string |
|||
masterConn *MasterConnection |
|||
} |
|||
|
|||
type rpcMaster struct { |
|||
conn *rpc.Client |
|||
addr string |
|||
} |
|||
func NewETNode() *ETNode { |
|||
const processChannelsCount = 1 |
|||
processBufSize := configLoad.LoadConfig().GetInt("performance.node.processBufSize") |
|||
processChannels := make([]chan []*common_models.ProcessData, processChannelsCount) |
|||
for i := 0; i < processChannelsCount; i++ { |
|||
processChannels[i] = make(chan []*common_models.ProcessData, processBufSize) |
|||
} |
|||
nodeServer := NewNodeServer(processChannels) |
|||
|
|||
grpcServer := grpc.NewServer() |
|||
pb.RegisterNodeServiceServer(grpcServer, nodeServer) |
|||
|
|||
const chSize = 1 |
|||
// 创建grpc健康检查服务
|
|||
healthServer := health.NewServer() |
|||
grpc_health_v1.RegisterHealthServer(grpcServer, healthServer) |
|||
// 设置初始健康状态
|
|||
healthServer.SetServingStatus("NodeService", grpc_health_v1.HealthCheckResponse_SERVING) |
|||
|
|||
func NewEtWorker() *EtNode { |
|||
node := &EtNode{ |
|||
ch: make(chan *common_models.ProcessData, chSize), |
|||
recvDataHandler: et_recv.NewRecvDataHanler(), |
|||
aggAnalyzeHandler: et_analyze.NewAggThresholdHandler(), |
|||
m := &ETNode{ |
|||
grpcServer: grpcServer, |
|||
nodeServer: nodeServer, |
|||
grpcServerStarted: make(chan struct{}), |
|||
processChannels: processChannels, |
|||
//groupDataChan: make(chan []*common_models.ProcessData, 500),
|
|||
} |
|||
node.exitMonitor() |
|||
node.heartMonitor() |
|||
return node |
|||
|
|||
// 初始化 Node 信息
|
|||
m.nodeInfo = m.initNodeInfo() |
|||
|
|||
return m |
|||
} |
|||
|
|||
// IotaDataHandler 是 RPC 服务方法,由 master 远程调用
|
|||
func (the *EtNode) IotaDataHandler(iotaData common_models.IotaData, reply *bool) error { |
|||
*reply = true |
|||
err := the.ConsumerProcess(&iotaData) |
|||
func (n *ETNode) startRPCServer() { |
|||
port := configLoad.LoadConfig().GetUint16("node.port") |
|||
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) |
|||
if err != nil { |
|||
*reply = false |
|||
log.Panicf("启动 Node RPC 服务失败:%v", err) |
|||
} |
|||
return err |
|||
} |
|||
defer listener.Close() |
|||
log.Printf("启动 Node RPC 服务成功,服务端口:%d", port) |
|||
|
|||
// 是沉降测试数据
|
|||
func isSettleData(data map[string]interface{}) bool { |
|||
// {"pressure":23.09,"temperature":24.93,"ssagee":16.44}
|
|||
validKeys := map[string]bool{ |
|||
"pressure": true, |
|||
"temperature": true, |
|||
"ssagee": true, |
|||
time.Sleep(100 * time.Millisecond) |
|||
close(n.grpcServerStarted) |
|||
|
|||
// 启动 gRPC 服务器
|
|||
if err := n.grpcServer.Serve(listener); err != nil { |
|||
log.Panicf("gRPC 服务器服务失败:%v", err) |
|||
} |
|||
} |
|||
|
|||
if len(data) != 3 { |
|||
return false |
|||
func (n *ETNode) initNodeInfo() *et_rpc.NodeArgs { |
|||
// 获取主机的 IP 地址前缀
|
|||
ipPrefix := configLoad.LoadConfig().GetString("node.hostIpPrefix") |
|||
ip4 := common_utils.ReadIP4WithPrefixFirst(ipPrefix) |
|||
// 获取主机名
|
|||
hostName, err := os.Hostname() |
|||
if err != nil { |
|||
log.Fatalf("获取主机名失败: %v", err) |
|||
} |
|||
// 获取配置的端口号
|
|||
log.Printf("node [%s] ip=%s\n", hostName, ip4) |
|||
port := configLoad.LoadConfig().GetUint16("node.port") |
|||
if port == 0 { |
|||
log.Fatalf("未配置有效的端口号") |
|||
} |
|||
// 构造 Node 的地址
|
|||
nodeAddr := fmt.Sprintf("%s:%d", ip4, port) |
|||
log.Printf("node 的地址为 %s", nodeAddr) |
|||
n.Addr = nodeAddr |
|||
|
|||
for key := range data { |
|||
if !validKeys[key] { |
|||
return false |
|||
} |
|||
// 初始化 Node 信息
|
|||
return &et_rpc.NodeArgs{ |
|||
ID: hostName + time.Now().Format("_20060102_150405"), |
|||
Status: et_rpc.NodeState_Healthy, |
|||
ErrCode: et_rpc.RPCReply_Success, |
|||
ResourceJson: "", |
|||
Addr: nodeAddr, |
|||
} |
|||
return true |
|||
} |
|||
|
|||
// ConsumerProcess 将 IotaData -> ProcessData
|
|||
func (the *EtNode) ConsumerProcess(iotaData *common_models.IotaData) error { |
|||
// 记录方法开始时间
|
|||
startTime := time.Now() |
|||
|
|||
//TODO #TEST BEGIN 测试静力水准仪 (现在有计算公式的单测点计算有问题,为了能跑通 沉降分组计算 测试)
|
|||
//if !isSettleData(iotaData.Data.Data) {
|
|||
// return nil
|
|||
//}
|
|||
// #TEST END
|
|||
func (n *ETNode) connectAndRegisterToMaster() { |
|||
// 获取master配置
|
|||
masterHost := configLoad.LoadConfig().GetString("node.remoteMasterHost") |
|||
masterPort := configLoad.LoadConfig().GetUint16("master.port") |
|||
if masterHost == "" { |
|||
masterHost = "127.0.0.1" |
|||
} |
|||
if masterPort == 0 { |
|||
masterPort = 50000 |
|||
} |
|||
masterAddr := fmt.Sprintf("%s:%d", masterHost, masterPort) |
|||
|
|||
deviceData, err := the.recvDataHandler.OnDataHandler(*iotaData) |
|||
// node 建立与 master 的连接
|
|||
masterConn, err := NewMasterConnection(masterAddr) |
|||
if err != nil { |
|||
return err |
|||
log.Printf("ERROR: 建立与 Master[%s] 的连接失败!!\n", masterAddr) |
|||
} else { |
|||
n.masterConn = masterConn |
|||
log.Printf("建立与 Master[%s] 的连接成功!\n", masterAddr) |
|||
} |
|||
|
|||
if deviceData == nil { |
|||
return nil |
|||
} |
|||
n.masterAddr = masterAddr |
|||
n.masterConn = masterConn |
|||
|
|||
log.Printf("rpc处理设备数据[%s]-time[%v]-[%v]", deviceData.DeviceId, deviceData.AcqTime, deviceData.Raw) |
|||
time.Sleep(500 * time.Millisecond) |
|||
|
|||
the.ch <- &common_models.ProcessData{ |
|||
DeviceData: *deviceData, |
|||
Stations: []common_models.Station{}, |
|||
// node 向 master 发送注册请求(node 调用 master 的注册服务)
|
|||
err = n.register() //尝试3次
|
|||
if err != nil { |
|||
// 3次尝试失败后退出程序
|
|||
log.Fatalf("node[%s]->master[%s] 注册失败,Error: %v", n.nodeInfo.Addr, masterAddr, err) |
|||
//log.Println(err)
|
|||
} |
|||
|
|||
defer func() { |
|||
duration := time.Since(startTime) |
|||
log.Printf("ConsumerProcess(iotaData *common_models.IotaData)执行时长: %v", duration) |
|||
}() |
|||
return nil |
|||
} |
|||
|
|||
// AggDataHandler 聚集阈值处理者,被 master 远程调用
|
|||
func (the *EtNode) AggDataHandler(aggData common_models.AggData, reply *bool) error { |
|||
*reply = true |
|||
err := the.aggAnalyzeHandler.ProcessData(&aggData) |
|||
if err != nil { |
|||
errmsg := fmt.Sprintf("[etNode.AggDataHandler]变化速率阈值分析%s[aggTypeId:%d]ERROR: %v", aggData.R(), aggData.AggTypeId, err) |
|||
log.Println(errmsg) |
|||
return err |
|||
func (n *ETNode) register() error { |
|||
if n.masterConn == nil { |
|||
return fmt.Errorf("n.masterConn is nil") |
|||
} |
|||
|
|||
log.Printf("[etNode.AggDataHandler]变化速率阈值分析SUCCESS。%s[aggTypeId:%d]changed[%v]", aggData.R(), aggData.AggTypeId, aggData.Changed) |
|||
return nil |
|||
} |
|||
//n.nodeInfo.Load = len(n.chIotaData)
|
|||
const maxRetries = 3 |
|||
retries := 0 |
|||
|
|||
// 实现源接口
|
|||
func (the *EtNode) Process(ctx context.Context) (<-chan any, error) { |
|||
source := make(chan any, chSize) |
|||
go func() { |
|||
defer close(source) |
|||
for { |
|||
select { |
|||
case a := <-the.ch: |
|||
source <- a |
|||
log.Printf("存储数据=>source out,len=%d,%d", len(source), len(the.ch)) |
|||
case <-ctx.Done(): |
|||
log.Println("退出[source] EtNode.Process") |
|||
return |
|||
} |
|||
for { |
|||
err := n.masterConn.CallRegister(n.nodeInfo) |
|||
if err == nil { |
|||
log.Println("注册成功") |
|||
return nil // 注册成功,返回 nil
|
|||
} else { |
|||
log.Printf("注册失败: %v", err) |
|||
retries++ |
|||
} |
|||
|
|||
if retries >= maxRetries { |
|||
log.Println("达到最大重试次数,停止注册尝试") |
|||
return fmt.Errorf("注册失败,达到最大重试次数: %v", err) // 返回错误
|
|||
} |
|||
}() |
|||
return source, nil |
|||
|
|||
// 每5秒发送一次注册消息
|
|||
time.Sleep(5 * time.Second) |
|||
} |
|||
} |
|||
|
|||
func (n *ETNode) unregister() { |
|||
reply := new(et_rpc.NodeArgs) |
|||
err := n.masterConn.CallUnregister() |
|||
if err != nil { |
|||
log.Printf("node[%s] 从master注销异常,err=%v", n.nodeInfo.Addr, err.Error()) |
|||
} else { |
|||
log.Printf("node[%s] 从master注销成功。reply=%+v", n.nodeInfo.Addr, reply) |
|||
} |
|||
} |
|||
|
|||
// RegisterToMaster 调用 master 发布的RPC服务方法 master.NodeRegister
|
|||
func (the *EtNode) RegisterToMaster() { |
|||
maxCount := 3 |
|||
connectCount := 0 |
|||
func (n *ETNode) heartbeat(interval time.Duration) { |
|||
// 心跳3次失败后,发送注册消息
|
|||
const maxRetries = 3 |
|||
retries := 0 |
|||
var err error |
|||
|
|||
for { |
|||
connectCount++ |
|||
if connectCount > maxCount { |
|||
log.Printf("RegisterToMaster 失败 超过%d次,准备退出", maxCount) |
|||
time.Sleep(time.Second * 10) |
|||
os.Exit(1) |
|||
} |
|||
masterAddr := loadMasterAddr() |
|||
masterConn, err := rpc.Dial("tcp", masterAddr) |
|||
if err != nil { |
|||
log.Printf("链接失败-> node[%s]", masterAddr) |
|||
time.Sleep(time.Second * 5) |
|||
if n.masterConn == nil { |
|||
log.Println("ERROR: masterConn is nil") |
|||
time.Sleep(1 * time.Second) |
|||
continue |
|||
} |
|||
the.master = &rpcMaster{ |
|||
conn: masterConn, |
|||
addr: masterAddr, |
|||
} |
|||
time.Sleep(time.Millisecond * 200) |
|||
//获取node自己地址
|
|||
ipPrefix := configLoad.LoadConfig().GetString("node.hostIpPrefix") |
|||
ip4 := common_utils.ReadIP4WithPrefixFirst(ipPrefix) |
|||
hostName, err := os.Hostname() |
|||
log.Printf("node [%s] ip=%s\n", hostName, ip4) |
|||
port := configLoad.LoadConfig().GetUint16("node.port") |
|||
callNodeAddr := fmt.Sprintf("%s:%d", ip4, port) |
|||
|
|||
if the.nodeInfo == nil { |
|||
the.nodeInfo = &common_models.NodeArgs{ |
|||
ID: hostName + time.Now().Format("_20060102_150405"), |
|||
NodeType: "etNode", |
|||
Status: "", |
|||
Resources: "", |
|||
Addr: callNodeAddr, |
|||
ThingIds: []string{}, |
|||
|
|||
err = n.masterConn.CallHeartbeatNode(n.nodeInfo.Addr) |
|||
if err == nil { |
|||
log.Println("心跳成功!!") |
|||
retries = 0 |
|||
} else { |
|||
log.Printf("心跳消息发送失败: %v", err) |
|||
if strings.Contains(err.Error(), "未注册的节点") { |
|||
retries = 3 |
|||
} else { |
|||
retries++ |
|||
} |
|||
} |
|||
|
|||
var result bool |
|||
err = the.master.conn.Call("master.NodeRegister", the.nodeInfo, &result) |
|||
if err != nil { |
|||
log.Printf("node[%s] 注册到 master[%s]异常:%v", the.nodeInfo.Addr, the.master.addr, result) |
|||
continue |
|||
} |
|||
break |
|||
} |
|||
} |
|||
func (the *EtNode) heartToMaster() { |
|||
maxCount := 3 |
|||
connectCount := 0 |
|||
reRegister := false |
|||
for { |
|||
connectCount++ |
|||
if connectCount > maxCount { |
|||
log.Printf("heartToMaster 失败 超过%d次", maxCount) |
|||
reRegister = true |
|||
break |
|||
} |
|||
var result bool |
|||
err := the.master.conn.Call("master.NodeHeart", the.nodeInfo, &result) |
|||
if err != nil { |
|||
log.Printf("node[%s] 心跳到 master[%s]异常:%v", the.nodeInfo.Addr, the.master.addr, result) |
|||
time.Sleep(time.Second * 5) |
|||
continue |
|||
if retries >= maxRetries { |
|||
// 发送注注册、重置重试计数
|
|||
err = n.register() |
|||
if err == nil { |
|||
log.Println("重新注册成功", n.Addr) |
|||
} else { |
|||
log.Println("重新注册失败", n.Addr) |
|||
} |
|||
|
|||
retries = 0 |
|||
} |
|||
break |
|||
} |
|||
if reRegister { //触发重新注册
|
|||
log.Printf("node[%s] 心跳失败触发-重新注册到 master[%s]", the.nodeInfo.Addr, the.master.addr) |
|||
the.RegisterToMaster() |
|||
} |
|||
} |
|||
|
|||
func (the *EtNode) UnRegisterToMaster() { |
|||
var result bool |
|||
if err := the.master.conn.Call("master.NodeUnRegister", the.nodeInfo, &result); err != nil { |
|||
log.Printf("node[%s] 从master注销,异常:%v", the.nodeInfo.Addr, err.Error()) |
|||
} else { |
|||
log.Printf("node[%s] 从master注销,结果:%v", the.nodeInfo.Addr, result) |
|||
// 每60秒发送一次心跳
|
|||
time.Sleep(interval) |
|||
} |
|||
} |
|||
func (the *EtNode) exitMonitor() { |
|||
go func() { |
|||
c := make(chan os.Signal, 1) |
|||
// 通过signal.Notify函数将信号通道c注册到系统相关的退出信号上
|
|||
// 这里使用了两个退出信号:syscall.SIGINT(Ctrl+C)和syscall.SIGTERM(系统发送的退出信号)
|
|||
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGKILL) |
|||
// 阻塞等待接收信号
|
|||
s := <-c |
|||
log.Printf("接收到退出信号:%v,进行清理工作", s) |
|||
the.UnRegisterToMaster() |
|||
time.Sleep(3 * time.Second) |
|||
os.Exit(0) |
|||
}() |
|||
|
|||
func (n *ETNode) startMonitorExit() { |
|||
c := make(chan os.Signal, 1) |
|||
// 通过signal.Notify函数将信号通道c注册到系统相关的退出信号上
|
|||
// 这里使用了两个退出信号:syscall.SIGINT(Ctrl+C)和syscall.SIGTERM(系统发送的退出信号)
|
|||
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGKILL) |
|||
// 阻塞等待接收信号
|
|||
s := <-c |
|||
log.Printf("接收到退出信号:%v,进行清理工作", s) |
|||
|
|||
// 注销
|
|||
n.unregister() |
|||
|
|||
// 等待所有数据处理完毕
|
|||
n.waitForDataProcessing() |
|||
log.Printf("退出前,通道中数据已经处理完毕!!") |
|||
|
|||
os.Exit(0) |
|||
} |
|||
func (the *EtNode) heartMonitor() { |
|||
|
|||
func (n *ETNode) waitForDataProcessing() { |
|||
const waitInterval = 10 * time.Second |
|||
var wg sync.WaitGroup |
|||
|
|||
// 处理 IotaData 通道
|
|||
wg.Add(1) |
|||
go func() { |
|||
ticker := time.NewTicker(time.Minute) |
|||
defer ticker.Stop() |
|||
for range ticker.C { |
|||
if the.master != nil { |
|||
log.Printf("node[%s] 心跳触发-> master[%s]", the.nodeInfo.Addr, the.master.addr) |
|||
the.heartToMaster() |
|||
defer wg.Done() |
|||
for _, ch := range n.processChannels { |
|||
for len(ch) > 0 { |
|||
time.Sleep(waitInterval) |
|||
} |
|||
} |
|||
}() |
|||
|
|||
// 处理 groupDataChan 通道
|
|||
wg.Add(1) |
|||
go func() { |
|||
defer wg.Done() |
|||
for len(n.groupDataChan) > 0 { |
|||
time.Sleep(waitInterval) |
|||
} |
|||
}() |
|||
|
|||
// 等待所有 goroutine 完成
|
|||
wg.Wait() |
|||
} |
|||
|
|||
// LoadCh test用
|
|||
func (the *EtNode) LoadCh() chan *common_models.ProcessData { |
|||
return the.ch |
|||
} |
|||
//func LogProcessDataTimeCost(nodeId, deviceId string, start time.Time) {
|
|||
// tc := time.Since(start)
|
|||
// log.Printf("******** [%s][%s]装载设备信息耗时: %v", nodeId, deviceId, tc)
|
|||
//}
|
|||
|
|||
func loadMasterAddr() string { |
|||
masterHost := configLoad.LoadConfig().GetString("node.remoteMasterHost") |
|||
masterPort := configLoad.LoadConfig().GetUint16("master.port") |
|||
if masterHost == "" { |
|||
masterHost = "127.0.0.1" |
|||
} |
|||
if masterPort == 0 { |
|||
masterPort = 50000 |
|||
} |
|||
return fmt.Sprintf("%s:%d", masterHost, masterPort) |
|||
} |
|||
// 是沉降测试数据
|
|||
//func isSettleData(data map[string]interface{}) bool {
|
|||
// // {"pressure":23.09,"temperature":24.93,"ssagee":16.44}
|
|||
// validKeys := map[string]bool{
|
|||
// "pressure": true,
|
|||
// "temperature": true,
|
|||
// "ssagee": true,
|
|||
// }
|
|||
//
|
|||
// if len(data) != 3 {
|
|||
// return false
|
|||
// }
|
|||
//
|
|||
// for key := range data {
|
|||
// if !validKeys[key] {
|
|||
// return false
|
|||
// }
|
|||
// }
|
|||
// return true
|
|||
//}
|
|||
|
@ -0,0 +1,242 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"context" |
|||
"et_rpc" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
pool "github.com/jolestar/go-commons-pool" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type MasterConnection struct { |
|||
MasterAddr string |
|||
Addr string |
|||
NArgs *et_rpc.NodeArgs |
|||
rpcPool *pool.ObjectPool |
|||
lastUpdate time.Time // 节点信息更新时间
|
|||
ctx context.Context |
|||
mu sync.Mutex |
|||
factory *MasterGRPCClientFactory |
|||
} |
|||
|
|||
// TODO NewMasterConnection 从配置文件中获取 pool 参数
|
|||
func NewMasterConnection(masterAddr string) (*MasterConnection, error) { |
|||
ctx := context.Background() |
|||
factory := NewMasterGRPCClientFactory(masterAddr) |
|||
p := pool.NewObjectPoolWithDefaultConfig(ctx, factory) |
|||
p.Config.MaxTotal = 10 // 最大连接数
|
|||
p.Config.MaxIdle = 5 // 最大空闲连接数据
|
|||
p.Config.MinIdle = 1 // 最小空闲连接数据
|
|||
p.Config.TestOnBorrow = true |
|||
p.Config.TestOnReturn = false |
|||
p.Config.TestWhileIdle = true // 是否在空闲时检查连接有效性
|
|||
p.Config.MinEvictableIdleTime = 30 * time.Minute //空闲连接最小可驱逐时间
|
|||
//p.Config.SoftMinEvictableIdleTime = 15 * time.Minute //空闲连接软最小可驱逐时间
|
|||
|
|||
conn := &MasterConnection{ |
|||
ctx: ctx, |
|||
MasterAddr: masterAddr, |
|||
rpcPool: p, |
|||
} |
|||
|
|||
// 获取连接进行简单的测试
|
|||
obj, err := conn.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("建立RPC连接失败:%w", err) |
|||
} |
|||
defer conn.rpcPool.ReturnObject(ctx, obj) |
|||
|
|||
grpcPoolObj, ok := obj.(*MasterGRPCPoolObject) |
|||
if !ok { |
|||
log.Fatalf("类型断言失败,obj 不是 *MasterGRPCPoolObject 类型") |
|||
} |
|||
|
|||
// 健康检查
|
|||
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn) |
|||
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{ |
|||
Service: "MasterService", |
|||
}) |
|||
|
|||
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING { |
|||
return nil, fmt.Errorf("健康检查失败: %v, 状态: %v", err, resp.Status) |
|||
} |
|||
|
|||
conn.factory = factory |
|||
return conn, nil |
|||
} |
|||
|
|||
func (n *MasterConnection) BorrowValidConnection(ctx context.Context) (*pool.PooledObject, error) { |
|||
var obj1 interface{} |
|||
var err error |
|||
|
|||
// 尝试借用连接,最多重试 3 次
|
|||
for attempts := 0; attempts < 3; attempts++ { |
|||
obj1, err = n.rpcPool.BorrowObject(ctx) |
|||
if err == nil { |
|||
break |
|||
} |
|||
log.Printf("Attempt %d: Failed to borrow object from pool: %v", attempts+1, err) |
|||
time.Sleep(1 * time.Second) |
|||
} |
|||
|
|||
if err != nil { |
|||
return nil, fmt.Errorf("borrow object error after 3 attempts: %w", err) |
|||
} |
|||
|
|||
pooledObject, ok := obj1.(*pool.PooledObject) |
|||
if !ok { |
|||
return nil, log.Output(2, "Invalid object type from pool") // 类型不匹配,返回错误
|
|||
} |
|||
|
|||
if !n.factory.ValidateObject(ctx, pooledObject) { |
|||
err = n.factory.DestroyObject(ctx, pooledObject) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
obj1, err = n.factory.MakeObject(ctx) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
pooledObject, ok = obj1.(*pool.PooledObject) |
|||
if !ok { |
|||
return nil, log.Output(2, "Invalid object type from pool after recreation") // 类型不匹配,返回错误
|
|||
} |
|||
} |
|||
|
|||
return pooledObject, nil |
|||
} |
|||
|
|||
func (n *MasterConnection) CallRegister(nodeInfo *et_rpc.NodeArgs) error { |
|||
n.NArgs = nodeInfo |
|||
|
|||
// 创建新的上下文并设置超时
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// 从连接池中借用一个连接
|
|||
obj1, err := n.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return fmt.Errorf("gRPC[CallRegister] 借用对象错误: %w", err) |
|||
} |
|||
|
|||
// 使用连接相关处理
|
|||
rpcPoolObj := obj1.(*MasterGRPCPoolObject) |
|||
|
|||
defer func() { |
|||
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil { |
|||
log.Printf("gRPC[CallRegister] 归还对象到连接池失败: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// 进行 RPC 调用
|
|||
request := &pb.NodeRequest{ |
|||
Id: fmt.Sprintf("master-%s", n.MasterAddr), |
|||
Address: nodeInfo.Addr, |
|||
ThingIds: make([]string, 0), |
|||
} |
|||
resp, err := rpcPoolObj.Client.RegisterNode(ctx, request) |
|||
if err != nil { |
|||
return fmt.Errorf("调用 gRPC[CallRegister] 错误: %w", err) |
|||
} |
|||
log.Printf("调用 gRPC[CallRegister] resp=%+v, err=%+v\n", resp, err) |
|||
|
|||
// 归还连接
|
|||
//err = n.rpcPool.ReturnObject(ctx, obj1)
|
|||
//if err != nil {
|
|||
// log.Printf("归还对象到连接池失败: %v", err)
|
|||
// return fmt.Errorf("归还对象错误: %w", err)
|
|||
//}
|
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (n *MasterConnection) CallUnregister() error { |
|||
// 创建新的上下文并设置超时
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// 从连接池中借用一个连接
|
|||
obj1, err := n.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return fmt.Errorf("gRPC[CallUnregister] 借用对象错误: %w", err) |
|||
} |
|||
|
|||
// 使用连接相关处理
|
|||
rpcPoolObj := obj1.(*MasterGRPCPoolObject) |
|||
|
|||
defer func() { |
|||
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil { |
|||
log.Printf("gRPC[CallUnregister] 归还对象到连接池失败: %v", err) |
|||
} |
|||
}() |
|||
|
|||
// 进行 RPC 调用
|
|||
request := &pb.NodeRequest{ |
|||
Id: "", |
|||
Address: n.Addr, |
|||
ThingIds: make([]string, 0), |
|||
} |
|||
resp, err := rpcPoolObj.Client.UnregisterNode(ctx, request) |
|||
if err != nil { |
|||
return fmt.Errorf("调用 gRPC[CallUnregister] 错误: %w", err) |
|||
} |
|||
log.Printf("调用 gRPC[CallUnregister] resp=%+v, err=%+v\n", resp, err) |
|||
|
|||
// 归还连接
|
|||
err = n.rpcPool.ReturnObject(ctx, obj1) |
|||
if err != nil { |
|||
log.Printf("归还对象到连接池失败: %v", err) |
|||
return fmt.Errorf("归还对象错误: %w", err) |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (n *MasterConnection) CallHeartbeatNode(nodeAddr string) error { |
|||
// 创建新的上下文并设置超时
|
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) |
|||
defer cancel() |
|||
|
|||
// 从连接池中借用一个连接
|
|||
obj1, err := n.rpcPool.BorrowObject(ctx) |
|||
if err != nil { |
|||
return fmt.Errorf("gRPC[CallHeartbeatNode] 借用对象错误: %w", err) |
|||
} |
|||
|
|||
// 使用连接相关处理
|
|||
rpcPoolObj := obj1.(*MasterGRPCPoolObject) |
|||
|
|||
defer func() { |
|||
if err := n.rpcPool.ReturnObject(ctx, obj1); err != nil { |
|||
log.Printf("gRPC[CallHeartbeatNode] 归还对象到连接池失败: %v", err) |
|||
} |
|||
log.Printf("gRPC[CallHeartbeatNode] 已归还对象 obj1 。") |
|||
}() |
|||
|
|||
// 进行 RPC 调用
|
|||
request := &pb.NodeRequest{ |
|||
Id: "", |
|||
Address: nodeAddr, |
|||
ThingIds: make([]string, 0), |
|||
} |
|||
resp, err := rpcPoolObj.Client.HeartbeatNode(ctx, request) |
|||
if err != nil { |
|||
return fmt.Errorf("调用 gRPC[CallHeartbeatNode] 错误: %w", err) |
|||
} |
|||
log.Printf("调用 gRPC[CallHeartbeatNode] resp=%+v, err=%+v\n", resp, err) |
|||
|
|||
// 归还连接
|
|||
//err = n.rpcPool.ReturnObject(ctx, obj1)
|
|||
//if err != nil {
|
|||
// log.Printf("归还对象到连接池失败: %v", err)
|
|||
// return fmt.Errorf("归还对象错误: %w", err)
|
|||
//}
|
|||
|
|||
return nil |
|||
} |
@ -0,0 +1,112 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"context" |
|||
"et_rpc/pb" |
|||
"google.golang.org/grpc" |
|||
"google.golang.org/grpc/credentials/insecure" |
|||
"google.golang.org/grpc/health/grpc_health_v1" |
|||
"log" |
|||
"time" |
|||
|
|||
pool "github.com/jolestar/go-commons-pool" |
|||
) |
|||
|
|||
type MasterGRPCPoolObject struct { |
|||
Conn *grpc.ClientConn // 保存 gRPC 连接
|
|||
Client pb.MasterServiceClient // gRPC 客户端
|
|||
} |
|||
|
|||
type MasterGRPCClientFactory struct { |
|||
address string |
|||
} |
|||
|
|||
// NewGRPCClientFactory 创建新的 gRPC 连接工厂
|
|||
func NewMasterGRPCClientFactory(address string) *MasterGRPCClientFactory { |
|||
return &MasterGRPCClientFactory{ |
|||
address: address, |
|||
} |
|||
} |
|||
|
|||
func (f *MasterGRPCClientFactory) MakeObject(ctx context.Context) (*pool.PooledObject, error) { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
|||
defer cancel() |
|||
|
|||
// 定义重试策略
|
|||
serviceConfig := `{ |
|||
"methodConfig": [{ |
|||
"name": [{"service": "MasterService", "method": "*"}], |
|||
"retryPolicy": { |
|||
"maxAttempts": 2, |
|||
"initialBackoff": "1s", |
|||
"maxBackoff": "10s", |
|||
"backoffMultiplier": 2, |
|||
"retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"] |
|||
} |
|||
}] |
|||
}` |
|||
|
|||
conn, err := grpc.NewClient( |
|||
f.address, |
|||
grpc.WithTransportCredentials(insecure.NewCredentials()), |
|||
grpc.WithDefaultServiceConfig(serviceConfig), |
|||
) |
|||
|
|||
if err != nil { |
|||
return nil, err // 如果3次都失败,返回错误
|
|||
} |
|||
|
|||
client := pb.NewMasterServiceClient(conn) |
|||
return pool.NewPooledObject( |
|||
&MasterGRPCPoolObject{ |
|||
Conn: conn, |
|||
Client: client, |
|||
}, |
|||
), nil |
|||
} |
|||
|
|||
// 销毁 gRPC 连接
|
|||
func (f *MasterGRPCClientFactory) DestroyObject(ctx context.Context, object *pool.PooledObject) error { |
|||
grpcPoolObj := object.Object.(*MasterGRPCPoolObject) |
|||
if grpcPoolObj.Client != nil { |
|||
// 关闭连接
|
|||
grpcPoolObj.Conn.Close() // gRPC 客户端连接关闭
|
|||
} |
|||
return nil |
|||
} |
|||
|
|||
// 验证 gRPC 连接的有效性
|
|||
func (f *MasterGRPCClientFactory) ValidateObject(ctx context.Context, object *pool.PooledObject) bool { |
|||
grpcPoolObj := object.Object.(*MasterGRPCPoolObject) |
|||
|
|||
select { |
|||
case <-ctx.Done(): |
|||
return false // 如果上下文已经取消,返回无效
|
|||
default: |
|||
// 继续进行有效性检查
|
|||
} |
|||
|
|||
healthClient := grpc_health_v1.NewHealthClient(grpcPoolObj.Conn) |
|||
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{ |
|||
Service: "MasterService", |
|||
}) |
|||
|
|||
if err != nil || resp.Status != grpc_health_v1.HealthCheckResponse_SERVING { |
|||
log.Println("ValidateObject failed:", err) |
|||
return false |
|||
} |
|||
|
|||
return true |
|||
} |
|||
|
|||
// 激活 gRPC 连接
|
|||
func (f *MasterGRPCClientFactory) ActivateObject(ctx context.Context, object *pool.PooledObject) error { |
|||
// 可以在这里发送心跳请求以确保连接有效
|
|||
return nil |
|||
} |
|||
|
|||
// 非激活 gRPC 连接
|
|||
func (f *MasterGRPCClientFactory) PassivateObject(ctx context.Context, object *pool.PooledObject) error { |
|||
// 可以在这里进行连接的重置,例如清除状态或缓存
|
|||
return nil |
|||
} |
@ -0,0 +1,68 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"et_Info" |
|||
"et_analyze" |
|||
"et_cache" |
|||
"et_cache/cacheSer" |
|||
"et_calc" |
|||
"et_calc/group" |
|||
"et_push" |
|||
"et_sink" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
"gitea.anxinyun.cn/container/common_utils" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"gitea.anxinyun.cn/container/common_utils/dbHelper" |
|||
"gitea.anxinyun.cn/container/common_utils/storage/storageDBs" |
|||
"node/stages" |
|||
) |
|||
|
|||
func CreateStages(inChan chan []*common_models.ProcessData, outChan chan []*common_models.ProcessData) *stages.StageManager { |
|||
redisAddr := configLoad.LoadConfig().GetString("redis.address") |
|||
configHelper := common_utils.NewConfigHelper(redisAddr) |
|||
cacheServer := cacheSer.NewCacheServer(configHelper) |
|||
esAddresses := configLoad.LoadConfig().GetStringSlice("es.addresses") |
|||
esESHelper := dbHelper.NewESHelper(esAddresses, "", "") |
|||
storageConsumers := storageDBs.LoadIStorageConsumer() |
|||
|
|||
// etNode 数据后处理环节
|
|||
nodeStageManage := stages.NewStageManager(outChan) |
|||
nodeStageManage.AddSource(inChan) |
|||
|
|||
// raws 数据存储
|
|||
sinkRawHandler := et_sink.NewSinkRawHandler(storageConsumers) |
|||
nodeStageManage.AddStages(sinkRawHandler.GetStage()) |
|||
|
|||
// 测点信息获取
|
|||
infoHandler := et_Info.NewInfoHandler(configHelper) |
|||
nodeStageManage.AddStages(infoHandler.GetStage()) |
|||
|
|||
// 单测点计算
|
|||
calcHandler := et_calc.NewCalcHandler(configHelper, cacheServer, esESHelper) |
|||
nodeStageManage.AddStages(calcHandler.GetStage()) |
|||
|
|||
// 测点数据缓存(滑窗过滤)
|
|||
cacheHandler := et_cache.NewCacheHandler(cacheServer) |
|||
nodeStageManage.AddStages(cacheHandler.GetStage()) |
|||
|
|||
// 测点分组计算
|
|||
groupCalcHandler := group.NewGroupCalc(configHelper) |
|||
nodeStageManage.AddStages(groupCalcHandler.GetStage()) |
|||
|
|||
// Theme 数据存储
|
|||
sinkThemeHandler := et_sink.NewSinkThemeHandler(storageConsumers) |
|||
nodeStageManage.AddStages(sinkThemeHandler.GetStage()) |
|||
|
|||
// 测点阈值分析
|
|||
stationAnalyzeHandler := et_analyze.NewThresholdHandler() |
|||
nodeStageManage.AddStages(stationAnalyzeHandler.GetStage()) |
|||
|
|||
//数据推送
|
|||
pushEnable := configLoad.LoadConfig().GetBool("push.enable") |
|||
if pushEnable { |
|||
publishHandler := et_push.NewPushHandler() |
|||
nodeStageManage.AddStages(publishHandler.GetStage()) |
|||
} |
|||
|
|||
return nodeStageManage |
|||
} |
@ -0,0 +1,455 @@ |
|||
package app |
|||
|
|||
import ( |
|||
"context" |
|||
"encoding/json" |
|||
"et_analyze" |
|||
"et_rpc/pb" |
|||
"fmt" |
|||
"gitea.anxinyun.cn/container/common_models" |
|||
"gitea.anxinyun.cn/container/common_utils/configLoad" |
|||
"golang.org/x/time/rate" |
|||
"log" |
|||
"math" |
|||
"node/et_worker/et_recv" |
|||
"strings" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
type UTCTime time.Time |
|||
|
|||
// UnmarshalJSON 自定义日期解析
|
|||
func (ut *UTCTime) UnmarshalJSON(data []byte) error { |
|||
// 去掉 JSON 字符串的引号
|
|||
str := string(data) |
|||
if len(str) < 2 || str[0] != '"' || str[len(str)-1] != '"' { |
|||
return fmt.Errorf("invalid time format: %s", str) |
|||
} |
|||
str = str[1 : len(str)-1] |
|||
|
|||
// 解析自定义日期格式
|
|||
t, err := time.Parse("2006-01-02T15:04:05.999-0700", str) |
|||
if err != nil { |
|||
return fmt.Errorf("failed to parse time: %v", err) |
|||
} |
|||
|
|||
// 赋值
|
|||
*ut = UTCTime(t) |
|||
return nil |
|||
} |
|||
|
|||
// aggDataMsg: {"date":"2024-09-19T09:39:59.999+0000","sensorId":106,"structId":1,"factorId":11,"aggTypeId":2006,"aggMethodId":3004,"agg":{"strain":-19.399999618530273},"changed":{"strain":-3}}
|
|||
type AggDataJson struct { |
|||
Date UTCTime |
|||
SensorId int |
|||
StructId int |
|||
FactorId int |
|||
AggTypeId int // 聚集类型 : 10分钟/30分钟/3小时/6小时/12小时/时/日/周/月聚集
|
|||
AggMethodId int // 聚集方法 : 平均值/最大值/最小值
|
|||
Agg map[string]float64 // 聚集数据
|
|||
Changed map[string]float64 // 变化量
|
|||
} |
|||
|
|||
// NodeServiceServer 实现了 NodeServiceServer 接口
|
|||
type NodeServiceServer struct { |
|||
pb.UnimplementedNodeServiceServer // 嵌入未实现的接口,以便将来兼容性
|
|||
|
|||
Addr string |
|||
Load int32 |
|||
|
|||
iotaDataHandler *et_recv.RecvDataHandler |
|||
aggDataHandler *et_analyze.AggThresholdHandler |
|||
|
|||
iotaChannels []chan []common_models.IotaData |
|||
processChannels []chan []*common_models.ProcessData |
|||
outProcessChannel chan []*common_models.ProcessData |
|||
|
|||
nextProcessChannel int // 记录下一个要尝试的通道索引
|
|||
nextChannel int // 记录下一个要尝试的通道索引
|
|||
mu sync.Mutex // 保护 nextChannel 的并发访问
|
|||
} |
|||
|
|||
func NewNodeServer(processChannels []chan []*common_models.ProcessData) *NodeServiceServer { |
|||
stageResultBufSize := configLoad.LoadConfig().GetInt("performance.node.stageResultBufSize") |
|||
iotaBufSize := configLoad.LoadConfig().GetInt("performance.node.iotaBufSize") |
|||
|
|||
s := &NodeServiceServer{ |
|||
iotaDataHandler: et_recv.NewRecvDataHandler(), |
|||
aggDataHandler: et_analyze.NewAggThresholdHandler(), |
|||
processChannels: processChannels, |
|||
outProcessChannel: make(chan []*common_models.ProcessData, stageResultBufSize), |
|||
} |
|||
s.iotaChannels = s.NewIotaChannels(len(processChannels), iotaBufSize) |
|||
|
|||
// 启动 DeviceInfo 缓存更新协程,设置为每 10 分钟更新一次
|
|||
s.iotaDataHandler.RecvConfigHelper.StartUpdateDeviceInfo(10*time.Minute, 30) |
|||
|
|||
// 处理 process data
|
|||
s.RunStageManager() |
|||
time.Sleep(500 * time.Millisecond) |
|||
|
|||
// 将 IotaData 转换为 DeviceData, 转换后数据发送到 s.processChannels
|
|||
go s.HandleIotaChannels() |
|||
|
|||
return s |
|||
} |
|||
|
|||
func (s *NodeServiceServer) NewIotaChannels(count, bufferSize int) []chan []common_models.IotaData { |
|||
channels := make([]chan []common_models.IotaData, count) |
|||
for i := 0; i < count; i++ { |
|||
channels[i] = make(chan []common_models.IotaData, bufferSize) |
|||
} |
|||
return channels |
|||
} |
|||
|
|||
func (s *NodeServiceServer) HandleIotaChannels() { |
|||
iotaWorkerCount := configLoad.LoadConfig().GetInt("performance.node.iotaWorkerCount") |
|||
|
|||
for index, ch := range s.iotaChannels { |
|||
for w := 0; w < iotaWorkerCount; w++ { |
|||
go func(c chan []common_models.IotaData) { |
|||
s.HandleIotaChan(c, index) |
|||
}(ch) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// func (s *NodeServiceServer) HandleProcessChannels() {
|
|||
func (s *NodeServiceServer) RunStageManager() { |
|||
for _, ch := range s.processChannels { |
|||
go func(c chan []*common_models.ProcessData) { |
|||
stageMgr := CreateStages(c, s.outProcessChannel) |
|||
stageMgr.RunStages() |
|||
}(ch) |
|||
} |
|||
} |
|||
|
|||
func (s *NodeServiceServer) sendToIotaChannels(data []common_models.IotaData) (chan []common_models.IotaData, bool) { |
|||
startTime := time.Now() |
|||
defer func() { |
|||
elapsedTime := time.Since(startTime) |
|||
log.Printf("sendToIotaChannels elapsedTime= %s\n", elapsedTime) |
|||
//log.Printf("Final iotaData channel states: ")
|
|||
//for _, ch := range s.iotaChannels {
|
|||
// log.Printf("iotaChan[%p]: %d/%d\n", ch, len(ch), cap(ch))
|
|||
//}
|
|||
}() |
|||
|
|||
var selectedChannel chan []common_models.IotaData |
|||
minLength := math.MaxInt32 |
|||
|
|||
// 选择最空闲的通道
|
|||
for _, ch := range s.iotaChannels { |
|||
if len(ch) < minLength { |
|||
minLength = len(ch) |
|||
selectedChannel = ch |
|||
} |
|||
} |
|||
|
|||
// 尝试发送数据
|
|||
select { |
|||
case selectedChannel <- data: |
|||
return selectedChannel, true |
|||
case <-time.After(100 * time.Millisecond): // 设置超时时间
|
|||
log.Println("Timeout while trying to send iotaData.") |
|||
return nil, false |
|||
} |
|||
} |
|||
|
|||
// 将 IotaData 转换为 DeviceData, 转换后数据发送到 s.processChannels
|
|||
func (s *NodeServiceServer) HandleIotaChan(ch chan []common_models.IotaData, index int) { |
|||
// 创建一个速率限制器,每秒允许处理 100 条数据
|
|||
limiter := rate.NewLimiter(10, 1) // 100 次/秒,突发容量为 1
|
|||
|
|||
for data := range ch { |
|||
// 等待直到可以处理下一条数据
|
|||
if err := limiter.Wait(context.Background()); err != nil { |
|||
log.Printf("处理速率限制错误: %v", err) |
|||
continue |
|||
} |
|||
|
|||
go func(iotaDataArray []common_models.IotaData) { |
|||
dataHandleTime := time.Now() // 记录 data 的处理开始时间
|
|||
formattedTime := dataHandleTime.Format("2006-01-02 15:04:05.999999999") |
|||
defer func() { |
|||
if r := recover(); r != nil { |
|||
log.Printf("Recovered from panic: %v", r) |
|||
} |
|||
log.Printf("4.iotaDataArray[%v] 处理耗时:%v", formattedTime, time.Since(dataHandleTime)) |
|||
}() |
|||
|
|||
log.Printf("1.iotaDataArray[%v] 准备处理。processChannel[%p]数据量:%d/%d", formattedTime, ch, len(ch), cap(ch)) |
|||
processDataArray := make([]*common_models.ProcessData, 0, len(iotaDataArray)) |
|||
for _, r := range iotaDataArray { |
|||
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) |
|||
defer cancel() |
|||
|
|||
iotaHandleTime := time.Now() // 记录 iota 数据处理开始时间
|
|||
deviceData, err := s.iotaDataHandler.OnDataHandler(ctx, r) |
|||
iotaHandleElapse := time.Since(iotaHandleTime) // 计算数据处理耗时
|
|||
|
|||
if err != nil { |
|||
log.Printf("IotaData->DeviceData[%s] 转换错误:%s. 耗时:%v。", r.DeviceId, err.Error(), iotaHandleElapse) |
|||
return |
|||
} |
|||
if deviceData == nil { |
|||
log.Printf("IotaData->DeviceData[%s] 转换错误:deviceData is nil. 耗时:%v。", r.DeviceId, iotaHandleElapse) |
|||
return |
|||
} |
|||
|
|||
// 将数据写入 processDataChannel
|
|||
pd := &common_models.ProcessData{ |
|||
DeviceData: *deviceData, |
|||
Stations: []common_models.Station{}, |
|||
} |
|||
processDataArray = append(processDataArray, pd) |
|||
} |
|||
|
|||
log.Printf("2.iotaDataArray[%v] 已处理完 IotaData->DeviceData ", formattedTime) |
|||
|
|||
sendTime := time.Now() |
|||
processChannel, ok := s.sendToProcessChannels(processDataArray, index) // 这里会一直等到有资源
|
|||
if !ok { |
|||
log.Printf("3.iotaDataArray[%v] s.processChannels %d个通道都已满,被阻塞。", formattedTime, len(s.processChannels)) |
|||
} else { |
|||
log.Printf("3.iotaDataArray[%v] 已发送至s.processChannels。processChannel[%p]数据量:%d/%d, \n发送耗时:%v ,iotaDataArray处理耗时:%v", |
|||
formattedTime, processChannel, len(processChannel), cap(processChannel), time.Since(sendTime), time.Since(dataHandleTime)) |
|||
} |
|||
}(data) |
|||
} |
|||
} |
|||
|
|||
func (s *NodeServiceServer) sendToProcessChannels(data []*common_models.ProcessData, index int) (chan []*common_models.ProcessData, bool) { |
|||
startTime := time.Now() |
|||
//timeoutDuration := configLoad.LoadConfig().GetUint16("performance.node.processTimeout") // 设置超时时间为 60 秒
|
|||
|
|||
defer func() { |
|||
elapsedTime := time.Since(startTime) |
|||
log.Printf("[sendToProcessChannels] elapsedTime= %s\n", elapsedTime) |
|||
//log.Printf("[sendToProcessChannels] Final processData channel states:")
|
|||
//for _, ch := range s.processChannels {
|
|||
// log.Printf("[sendToProcessChannels] processChan[%p]: %d/%d\n", ch, len(ch), cap(ch))
|
|||
//}
|
|||
}() |
|||
|
|||
selectedChannel := s.processChannels[index] |
|||
log.Printf("[sendToProcessChannels] 尝试发送。 channel[%p]: %d/%d\n", selectedChannel, len(selectedChannel), cap(selectedChannel)) |
|||
|
|||
// 超时限制
|
|||
//timeout := time.After(time.Duration(timeoutDuration))
|
|||
|
|||
for { |
|||
select { |
|||
case selectedChannel <- data: |
|||
// 发送成功
|
|||
log.Printf("[sendToProcessChannels] 发送成功。channel[%p]: %d/%d\n", selectedChannel, len(selectedChannel), cap(selectedChannel)) |
|||
time.Sleep(50 * time.Millisecond) |
|||
return selectedChannel, true |
|||
default: |
|||
//log.Printf("[sendToProcessChannels] channel[%p] 已满 cap=%d,继续尝试发送。\n", selectedChannel, cap(selectedChannel))
|
|||
time.Sleep(200 * time.Millisecond) // 等待一段时间后再尝试
|
|||
//case <-timeout:
|
|||
// log.Printf("[sendToProcessChannels] 发送超时超过1分钟,将停止尝试。\n")
|
|||
// return nil, false // 超时返回 nil 和 false
|
|||
//case <-time.After(500 * time.Millisecond):
|
|||
// log.Printf("[sendToProcessChannels] 发送超时500ms,将继续尝试channel[%p]。\n", selectedChannel)
|
|||
// continue
|
|||
} |
|||
} |
|||
} |
|||
|
|||
var limiter = rate.NewLimiter(rate.Limit(500), 1) // 每秒最多处理 1000 条数据
|
|||
|
|||
func (s *NodeServiceServer) HandleIotaData(ctx context.Context, req *pb.HandleDataRequest) (*pb.HandleDataResponse, error) { |
|||
if err := limiter.Wait(ctx); err != nil { |
|||
return nil, fmt.Errorf("请求速率过高,请稍后重试") |
|||
} |
|||
|
|||
startTime := time.Now() |
|||
|
|||
// 1. 数据转换
|
|||
conversionStart := time.Now() |
|||
iotaDataList := s.convertMessages2IotaData(req.Messages) |
|||
conversionDuration := time.Since(conversionStart) |
|||
log.Printf("[INFO][HandleIotaData] 1.数据转换耗时: %v, 共 %d 条数据。", conversionDuration, len(req.Messages)) |
|||
|
|||
// 2. 发送到 Iota 通道
|
|||
sendStart := time.Now() |
|||
_, ok := s.sendToIotaChannels(iotaDataList) |
|||
sendDuration := time.Since(sendStart) |
|||
log.Printf("[INFO] [HandleIotaData] 2.sendToIotaChannels耗时: %v。", sendDuration) |
|||
//log.Printf("[INFO] [HandleIotaData] 2.sendToIotaChannels耗时: %v。通道状态: %v, 通道指针: %p, 当前长度: %d/%d",
|
|||
// sendDuration, ok, ch, len(ch), cap(ch))
|
|||
|
|||
if !ok { |
|||
log.Printf("[WARN] [HandleIotaData] 2.所有 Iota 通道已满,无法发送数据,通道数量: %d", len(s.iotaChannels)) |
|||
return &pb.HandleDataResponse{ |
|||
Addr: s.Addr, |
|||
Load: s.Load, |
|||
Status: pb.HandleDataResponse_SUCCESS, |
|||
ErrorMessage: "s.iotaChannels 通道已满", |
|||
}, nil |
|||
} |
|||
|
|||
// 3. 计算总处理时长
|
|||
totalDuration := time.Since(startTime) |
|||
log.Printf("[INFO] [HandleIotaData] 3.总处理时长: %v", totalDuration) |
|||
|
|||
return &pb.HandleDataResponse{ |
|||
Addr: s.Addr, |
|||
Load: s.Load, |
|||
Status: pb.HandleDataResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
}, nil |
|||
} |
|||
|
|||
// HandleAggData 处理聚集数据并返回节点响应
|
|||
func (s *NodeServiceServer) HandleAggData(ctx context.Context, req *pb.HandleDataRequest) (*pb.HandleDataResponse, error) { |
|||
if err := limiter.Wait(ctx); err != nil { |
|||
return nil, fmt.Errorf("请求速率过高,请稍后重试") |
|||
} |
|||
|
|||
startTime := time.Now() |
|||
|
|||
// 1. 数据转换
|
|||
conversionStart := time.Now() |
|||
aggDataList := s.convertMessages2AggData(req.Messages) |
|||
conversionDuration := time.Since(conversionStart) |
|||
log.Printf("[INFO][HandleAggData] 1.数据转换耗时: %v, 共 %d 条数据。", conversionDuration, len(req.Messages)) |
|||
|
|||
// 2. 发送到 Iota 通道
|
|||
analyzeStart := time.Now() |
|||
for _, aggData := range aggDataList { |
|||
err := s.aggDataHandler.ProcessData(&aggData) |
|||
if err != nil { |
|||
errmsg := fmt.Sprintf("[etNode.AggDataHandler] 2.变化速率阈值分析%s[aggTypeId:%d]ERROR: %v", aggData.R(), aggData.AggTypeId, err) |
|||
log.Println(errmsg) |
|||
} |
|||
//} else {
|
|||
// log.Printf("[etNode.AggDataHandler]变化速率阈值分析SUCCESS。%s[aggTypeId:%d]changed[%v]", aggData.R(), aggData.AggTypeId, aggData.Changed)
|
|||
//}
|
|||
} |
|||
analyzeDuration := time.Since(analyzeStart) |
|||
log.Printf("[INFO][HandleAggData] 2. 变化速率阈值分析耗时: %v。", analyzeDuration) |
|||
|
|||
// 3. 计算总处理时长
|
|||
totalDuration := time.Since(startTime) |
|||
log.Printf("[INFO][HandleAggData] 3.总处理时长: %v", totalDuration) |
|||
|
|||
// 返回响应
|
|||
return &pb.HandleDataResponse{ |
|||
Addr: s.Addr, |
|||
Load: s.Load, |
|||
Status: pb.HandleDataResponse_SUCCESS, |
|||
ErrorMessage: "", |
|||
}, nil |
|||
} |
|||
|
|||
// mustEmbedUnimplementedNodeServiceServer 确保实现了接口
|
|||
func (s *NodeServiceServer) mustEmbedUnimplementedNodeServiceServer() {} |
|||
|
|||
// createErrorResponse 用于创建错误响应
|
|||
func (s *NodeServiceServer) createErrorResponse(status pb.HandleDataResponse_Status, message string) (*pb.HandleDataResponse, error) { |
|||
response := &pb.HandleDataResponse{ |
|||
Addr: s.Addr, |
|||
Load: s.Load, |
|||
Status: status, |
|||
ErrorMessage: message, |
|||
} |
|||
log.Printf(message) // 记录错误信息
|
|||
return response, fmt.Errorf(message) |
|||
} |
|||
|
|||
func (s *NodeServiceServer) convertMessages2IotaData(messages []string) []common_models.IotaData { |
|||
st := time.Now() |
|||
dataArray := make([]common_models.IotaData, 0, len(messages)) |
|||
|
|||
// 尝试批量解析
|
|||
jsonArray := fmt.Sprintf("[%s]", strings.Join(messages, ",")) |
|||
if err := json.Unmarshal([]byte(jsonArray), &dataArray); err != nil { |
|||
// 批量解析失败,逐个解析
|
|||
for _, msg := range messages { |
|||
var data common_models.IotaData |
|||
if err := json.Unmarshal([]byte(msg), &data); err != nil { |
|||
log.Printf("逐个 JSON 反序列化失败:%v", err) |
|||
continue |
|||
} |
|||
dataArray = append(dataArray, data) |
|||
} |
|||
} |
|||
|
|||
log.Printf("[convertMessages2IotaData] 序列化耗时:%v ,共解析出 %d 个 IotaData。", time.Since(st), len(dataArray)) |
|||
return dataArray |
|||
} |
|||
|
|||
func (s *NodeServiceServer) convertMessages2AggData(messages []string) []common_models.AggData { |
|||
//log.Printf("[convertMessages2AggData] len(messages)=%d ,start ...", len(messages))
|
|||
st := time.Now() |
|||
|
|||
// 预分配 aggDatas 的容量
|
|||
aggDatas := make([]common_models.AggData, 0, len(messages)) |
|||
|
|||
// 尝试批量解析 JSON 数组
|
|||
jsonArray := fmt.Sprintf("[%s]", strings.Join(messages, ",")) |
|||
var tmpDatas []AggDataJson |
|||
err := json.Unmarshal([]byte(jsonArray), &tmpDatas) |
|||
|
|||
if err != nil { |
|||
log.Printf("JSON 数组反序列化失败,尝试逐个解析:%v", err) |
|||
|
|||
// 如果批量解析失败,逐个解析 JSON 字符串
|
|||
var wg sync.WaitGroup |
|||
var mu sync.Mutex |
|||
|
|||
for _, val := range messages { |
|||
wg.Add(1) |
|||
go func(msg string) { |
|||
defer wg.Done() |
|||
|
|||
var data AggDataJson |
|||
if err := json.Unmarshal([]byte(msg), &data); err != nil { |
|||
log.Printf("逐个 JSON 反序列化失败:%v", err) |
|||
return |
|||
} |
|||
|
|||
// 加锁保护 aggDatas
|
|||
mu.Lock() |
|||
aggDatas = append(aggDatas, common_models.AggData{ |
|||
Date: time.Time(data.Date), |
|||
SensorId: data.SensorId, |
|||
StructId: data.StructId, |
|||
FactorId: data.FactorId, |
|||
AggTypeId: data.AggTypeId, |
|||
AggMethodId: data.AggMethodId, |
|||
Agg: data.Agg, |
|||
Changed: data.Changed, |
|||
ThingId: "", |
|||
}) |
|||
mu.Unlock() |
|||
}(val) |
|||
} |
|||
|
|||
// 等待所有 Goroutine 完成
|
|||
wg.Wait() |
|||
} else { |
|||
// 批量解析成功,直接转换
|
|||
aggDatas = make([]common_models.AggData, len(tmpDatas)) |
|||
for i, data := range tmpDatas { |
|||
aggDatas[i] = common_models.AggData{ |
|||
Date: time.Time(data.Date), |
|||
SensorId: data.SensorId, |
|||
StructId: data.StructId, |
|||
FactorId: data.FactorId, |
|||
AggTypeId: data.AggTypeId, |
|||
AggMethodId: data.AggMethodId, |
|||
Agg: data.Agg, |
|||
Changed: data.Changed, |
|||
ThingId: "", |
|||
} |
|||
} |
|||
} |
|||
|
|||
log.Printf("[convertMessages2AggData] 序列化耗时:%v ,共解析出 %d 个 AggData。", time.Since(st), len(aggDatas)) |
|||
return aggDatas |
|||
} |
Loading…
Reference in new issue