@@ -65,11 +65,11 @@ public class RaftNode
6565 /// <summary>
6666 /// Minimum election timeout in milliseconds / 最小选举超时(毫秒)
6767 /// </summary>
68- private readonly int _electionTimeoutMin = 150 ;
68+ private readonly int _electionTimeoutMin = 1500 ;
6969 /// <summary>
7070 /// Maximum election timeout in milliseconds / 最大选举超时(毫秒)
7171 /// </summary>
72- private readonly int _electionTimeoutMax = 300 ;
72+ private readonly int _electionTimeoutMax = 3000 ;
7373 /// <summary>
7474 /// Heartbeat interval in milliseconds / 心跳间隔(毫秒)
7575 /// 默认 1000ms,避免过于频繁的心跳导致日志和 MQ 压力过大
@@ -180,6 +180,8 @@ private void BecomeCandidate()
180180 CurrentTerm ++ ;
181181 State = RaftNodeState . Candidate ;
182182 VotedFor = _nodeId ;
183+ // 更新心跳时间,确保选举定时器从当前时间开始计算
184+ _lastHeartbeatTime = DateTime . UtcNow ;
183185 _logger . LogInformation ( $ "Node { _nodeId } became Candidate in term { CurrentTerm } ") ;
184186
185187 // Start election / 开始选举
@@ -309,21 +311,21 @@ private async Task StartElectionAsync()
309311 if ( isTwoNodeCluster )
310312 {
311313 _logger . LogInformation ( $ "Two-node cluster detected. Using network quality-based leader selection.") ;
312-
314+
313315 // Add a small random delay to avoid simultaneous elections / 添加小的随机延迟以避免同时选举
314316 var randomDelay = new Random ( ) . Next ( 100 , 300 ) ;
315317 await Task . Delay ( randomDelay ) ;
316-
318+
317319 // Re-check state in case we received a heartbeat from the other node / 重新检查状态,以防我们从另一个节点收到心跳
318320 if ( State != RaftNodeState . Candidate )
319321 {
320322 _logger . LogInformation ( $ "Node { _nodeId } state changed during delay, aborting leader selection") ;
321323 return ;
322324 }
323-
325+
324326 // For 2-node clusters, select leader based on network quality / 对于2节点集群,基于网络质量选择Leader
325327 var shouldBecomeLeader = await ShouldBecomeLeaderBasedOnNetworkQuality ( knownNodes ) ;
326-
328+
327329 if ( shouldBecomeLeader )
328330 {
329331 _logger . LogInformation ( $ "Node { _nodeId } selected as leader based on network quality in 2-node cluster") ;
@@ -450,14 +452,22 @@ private void ResetElectionTimer()
450452 return ; // Leaders don't need election timer / 领导者不需要选举定时器
451453 }
452454
453- var timeout = new Random ( ) . Next ( _electionTimeoutMin , _electionTimeoutMax ) ;
455+ // 如果是 Candidate 状态,使用更长的超时时间,确保选举有足够时间完成
456+ // If in Candidate state, use longer timeout to ensure election has enough time to complete
457+ var timeout = State == RaftNodeState . Candidate
458+ ? new Random ( ) . Next ( _electionTimeoutMax , _electionTimeoutMax * 2 )
459+ : new Random ( ) . Next ( _electionTimeoutMin , _electionTimeoutMax ) ;
460+
454461 _electionTimer = new Timer ( async _ =>
455462 {
456- if ( State != RaftNodeState . Leader &&
457- ( DateTime . UtcNow - _lastHeartbeatTime ) . TotalMilliseconds > timeout )
463+ lock ( _stateLock )
458464 {
459- _logger . LogInformation ( $ "Election timeout reached for node { _nodeId } ") ;
460- BecomeCandidate ( ) ;
465+ if ( State != RaftNodeState . Leader &&
466+ ( DateTime . UtcNow - _lastHeartbeatTime ) . TotalMilliseconds > timeout )
467+ {
468+ _logger . LogInformation ( $ "Election timeout reached for node { _nodeId } (State: { State } , Timeout: { timeout } ms)") ;
469+ BecomeCandidate ( ) ;
470+ }
461471 }
462472 } , null , timeout , Timeout . Infinite ) ;
463473 }
@@ -470,7 +480,7 @@ private void ResetElectionTimer()
470480 private void OnMessageReceived ( object sender , ClusterMessageEventArgs e )
471481 {
472482 _logger . LogWarning ( $ "[RaftNode] OnMessageReceived 被调用 - NodeId: { _nodeId } , MessageType: { e . Message . Type } , FromNodeId: { e . Message . FromNodeId } , ToNodeId: { e . Message . ToNodeId } , MessageId: { e . Message . MessageId } ") ;
473-
483+
474484 _lastHeartbeatTime = DateTime . UtcNow ;
475485
476486 switch ( e . Message . Type )
@@ -502,7 +512,7 @@ private void HandleRequestVote(ClusterMessage message)
502512 try
503513 {
504514 _logger . LogWarning ( $ "[RaftNode] HandleRequestVote 开始处理 - NodeId: { _nodeId } , FromNodeId: { message . FromNodeId } , PayloadLength: { message . Payload ? . Length ?? 0 } ") ;
505-
515+
506516 var request = System . Text . Json . JsonSerializer . Deserialize < RequestVoteMessage > ( message . Payload ) ;
507517 _logger . LogWarning ( $ "[RaftNode] RequestVote 解析成功 - NodeId: { _nodeId } , RequestTerm: { request . Term } , CandidateId: { request . CandidateId } , CurrentTerm: { CurrentTerm } , VotedFor: { VotedFor } ") ;
508518
@@ -803,11 +813,11 @@ private List<string> GetKnownNodeIds()
803813 if ( transportType . Name == "HybridClusterTransport" || transportType . FullName ? . Contains ( "HybridClusterTransport" ) == true )
804814 {
805815 _logger . LogWarning ( $ "[RaftNode] Transport is HybridClusterTransport, attempting to get known nodes") ;
806-
816+
807817 // First try public method GetKnownNodeIds / 首先尝试公共方法 GetKnownNodeIds
808- var getKnownNodeIdsMethod = transportType . GetMethod ( "GetKnownNodeIds" ,
818+ var getKnownNodeIdsMethod = transportType . GetMethod ( "GetKnownNodeIds" ,
809819 System . Reflection . BindingFlags . Public | System . Reflection . BindingFlags . Instance ) ;
810-
820+
811821 if ( getKnownNodeIdsMethod != null )
812822 {
813823 var result = getKnownNodeIdsMethod . Invoke ( _transport , null ) ;
@@ -823,14 +833,14 @@ private List<string> GetKnownNodeIds()
823833 }
824834 }
825835 }
826-
836+
827837 // Fallback to reflection if method not found / 如果方法未找到,回退到反射
828838 if ( nodeIds . Count == 0 )
829839 {
830840 _logger . LogWarning ( $ "[RaftNode] GetKnownNodeIds() returned no nodes, trying reflection") ;
831- var knownNodesField = transportType . GetField ( "_knownNodes" ,
841+ var knownNodesField = transportType . GetField ( "_knownNodes" ,
832842 System . Reflection . BindingFlags . NonPublic | System . Reflection . BindingFlags . Instance ) ;
833-
843+
834844 if ( knownNodesField != null )
835845 {
836846 var knownNodesDict = knownNodesField . GetValue ( _transport ) ;
@@ -848,7 +858,7 @@ private List<string> GetKnownNodeIds()
848858 }
849859 }
850860 }
851-
861+
852862 // If no nodes found via reflection, fall back to cluster configuration
853863 // 如果通过反射没有找到节点,回退到集群配置
854864 if ( nodeIds . Count == 0 )
@@ -953,7 +963,7 @@ private async Task<bool> ShouldBecomeLeaderBasedOnNetworkQuality(List<string> kn
953963 // Also consider: if we can't measure the other node's quality to us,
954964 // we'll use a tie-breaker (node ID comparison)
955965 // 同时考虑:如果我们无法测量另一个节点到我们的质量,我们将使用平局决胜(节点ID比较)
956-
966+
957967 if ( myTotalQuality > otherQuality )
958968 {
959969 _logger . LogInformation ( $ "Node { _nodeId } has better network quality ({ myTotalQuality } vs { otherQuality } ), becoming leader") ;
0 commit comments