Skip to content

Commit a503f25

Browse files
committed
fix
1 parent 7db7f16 commit a503f25

1 file changed

Lines changed: 31 additions & 21 deletions

File tree

  • Cyaim.WebSocketServer/Cyaim.WebSocketServer/Infrastructure/Cluster

Cyaim.WebSocketServer/Cyaim.WebSocketServer/Infrastructure/Cluster/RaftNode.cs

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ public class RaftNode
6565
/// <summary>
6666
/// Minimum election timeout in milliseconds / 最小选举超时(毫秒)
6767
/// </summary>
68-
private readonly int _electionTimeoutMin = 150;
68+
private readonly int _electionTimeoutMin = 1500;
6969
/// <summary>
7070
/// Maximum election timeout in milliseconds / 最大选举超时(毫秒)
7171
/// </summary>
72-
private readonly int _electionTimeoutMax = 300;
72+
private readonly int _electionTimeoutMax = 3000;
7373
/// <summary>
7474
/// Heartbeat interval in milliseconds / 心跳间隔(毫秒)
7575
/// 默认 1000ms,避免过于频繁的心跳导致日志和 MQ 压力过大
@@ -180,6 +180,8 @@ private void BecomeCandidate()
180180
CurrentTerm++;
181181
State = RaftNodeState.Candidate;
182182
VotedFor = _nodeId;
183+
// 更新心跳时间,确保选举定时器从当前时间开始计算
184+
_lastHeartbeatTime = DateTime.UtcNow;
183185
_logger.LogInformation($"Node {_nodeId} became Candidate in term {CurrentTerm}");
184186

185187
// Start election / 开始选举
@@ -309,21 +311,21 @@ private async Task StartElectionAsync()
309311
if (isTwoNodeCluster)
310312
{
311313
_logger.LogInformation($"Two-node cluster detected. Using network quality-based leader selection.");
312-
314+
313315
// Add a small random delay to avoid simultaneous elections / 添加小的随机延迟以避免同时选举
314316
var randomDelay = new Random().Next(100, 300);
315317
await Task.Delay(randomDelay);
316-
318+
317319
// Re-check state in case we received a heartbeat from the other node / 重新检查状态,以防我们从另一个节点收到心跳
318320
if (State != RaftNodeState.Candidate)
319321
{
320322
_logger.LogInformation($"Node {_nodeId} state changed during delay, aborting leader selection");
321323
return;
322324
}
323-
325+
324326
// For 2-node clusters, select leader based on network quality / 对于2节点集群,基于网络质量选择Leader
325327
var shouldBecomeLeader = await ShouldBecomeLeaderBasedOnNetworkQuality(knownNodes);
326-
328+
327329
if (shouldBecomeLeader)
328330
{
329331
_logger.LogInformation($"Node {_nodeId} selected as leader based on network quality in 2-node cluster");
@@ -450,14 +452,22 @@ private void ResetElectionTimer()
450452
return; // Leaders don't need election timer / 领导者不需要选举定时器
451453
}
452454

453-
var timeout = new Random().Next(_electionTimeoutMin, _electionTimeoutMax);
455+
// 如果是 Candidate 状态,使用更长的超时时间,确保选举有足够时间完成
456+
// If in Candidate state, use longer timeout to ensure election has enough time to complete
457+
var timeout = State == RaftNodeState.Candidate
458+
? new Random().Next(_electionTimeoutMax, _electionTimeoutMax * 2)
459+
: new Random().Next(_electionTimeoutMin, _electionTimeoutMax);
460+
454461
_electionTimer = new Timer(async _ =>
455462
{
456-
if (State != RaftNodeState.Leader &&
457-
(DateTime.UtcNow - _lastHeartbeatTime).TotalMilliseconds > timeout)
463+
lock (_stateLock)
458464
{
459-
_logger.LogInformation($"Election timeout reached for node {_nodeId}");
460-
BecomeCandidate();
465+
if (State != RaftNodeState.Leader &&
466+
(DateTime.UtcNow - _lastHeartbeatTime).TotalMilliseconds > timeout)
467+
{
468+
_logger.LogInformation($"Election timeout reached for node {_nodeId} (State: {State}, Timeout: {timeout}ms)");
469+
BecomeCandidate();
470+
}
461471
}
462472
}, null, timeout, Timeout.Infinite);
463473
}
@@ -470,7 +480,7 @@ private void ResetElectionTimer()
470480
private void OnMessageReceived(object sender, ClusterMessageEventArgs e)
471481
{
472482
_logger.LogWarning($"[RaftNode] OnMessageReceived 被调用 - NodeId: {_nodeId}, MessageType: {e.Message.Type}, FromNodeId: {e.Message.FromNodeId}, ToNodeId: {e.Message.ToNodeId}, MessageId: {e.Message.MessageId}");
473-
483+
474484
_lastHeartbeatTime = DateTime.UtcNow;
475485

476486
switch (e.Message.Type)
@@ -502,7 +512,7 @@ private void HandleRequestVote(ClusterMessage message)
502512
try
503513
{
504514
_logger.LogWarning($"[RaftNode] HandleRequestVote 开始处理 - NodeId: {_nodeId}, FromNodeId: {message.FromNodeId}, PayloadLength: {message.Payload?.Length ?? 0}");
505-
515+
506516
var request = System.Text.Json.JsonSerializer.Deserialize<RequestVoteMessage>(message.Payload);
507517
_logger.LogWarning($"[RaftNode] RequestVote 解析成功 - NodeId: {_nodeId}, RequestTerm: {request.Term}, CandidateId: {request.CandidateId}, CurrentTerm: {CurrentTerm}, VotedFor: {VotedFor}");
508518

@@ -803,11 +813,11 @@ private List<string> GetKnownNodeIds()
803813
if (transportType.Name == "HybridClusterTransport" || transportType.FullName?.Contains("HybridClusterTransport") == true)
804814
{
805815
_logger.LogWarning($"[RaftNode] Transport is HybridClusterTransport, attempting to get known nodes");
806-
816+
807817
// First try public method GetKnownNodeIds / 首先尝试公共方法 GetKnownNodeIds
808-
var getKnownNodeIdsMethod = transportType.GetMethod("GetKnownNodeIds",
818+
var getKnownNodeIdsMethod = transportType.GetMethod("GetKnownNodeIds",
809819
System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.Instance);
810-
820+
811821
if (getKnownNodeIdsMethod != null)
812822
{
813823
var result = getKnownNodeIdsMethod.Invoke(_transport, null);
@@ -823,14 +833,14 @@ private List<string> GetKnownNodeIds()
823833
}
824834
}
825835
}
826-
836+
827837
// Fallback to reflection if method not found / 如果方法未找到,回退到反射
828838
if (nodeIds.Count == 0)
829839
{
830840
_logger.LogWarning($"[RaftNode] GetKnownNodeIds() returned no nodes, trying reflection");
831-
var knownNodesField = transportType.GetField("_knownNodes",
841+
var knownNodesField = transportType.GetField("_knownNodes",
832842
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
833-
843+
834844
if (knownNodesField != null)
835845
{
836846
var knownNodesDict = knownNodesField.GetValue(_transport);
@@ -848,7 +858,7 @@ private List<string> GetKnownNodeIds()
848858
}
849859
}
850860
}
851-
861+
852862
// If no nodes found via reflection, fall back to cluster configuration
853863
// 如果通过反射没有找到节点,回退到集群配置
854864
if (nodeIds.Count == 0)
@@ -953,7 +963,7 @@ private async Task<bool> ShouldBecomeLeaderBasedOnNetworkQuality(List<string> kn
953963
// Also consider: if we can't measure the other node's quality to us,
954964
// we'll use a tie-breaker (node ID comparison)
955965
// 同时考虑:如果我们无法测量另一个节点到我们的质量,我们将使用平局决胜(节点ID比较)
956-
966+
957967
if (myTotalQuality > otherQuality)
958968
{
959969
_logger.LogInformation($"Node {_nodeId} has better network quality ({myTotalQuality} vs {otherQuality}), becoming leader");

0 commit comments

Comments
 (0)