|
42 | 42 | import com.alipay.sofa.jraft.JRaftUtils; |
43 | 43 | import com.alipay.sofa.jraft.Node; |
44 | 44 | import com.alipay.sofa.jraft.RaftGroupService; |
45 | | -import com.alipay.sofa.jraft.ReplicatorGroup; |
46 | 45 | import com.alipay.sofa.jraft.Status; |
47 | 46 | import com.alipay.sofa.jraft.conf.Configuration; |
48 | 47 | import com.alipay.sofa.jraft.core.Replicator; |
49 | 48 | import com.alipay.sofa.jraft.entity.PeerId; |
50 | 49 | import com.alipay.sofa.jraft.entity.Task; |
51 | 50 | import com.alipay.sofa.jraft.error.RaftError; |
52 | 51 | import com.alipay.sofa.jraft.option.NodeOptions; |
53 | | -import com.alipay.sofa.jraft.option.RaftOptions; |
54 | 52 | import com.alipay.sofa.jraft.option.RpcOptions; |
55 | 53 | import com.alipay.sofa.jraft.rpc.RaftRpcServerFactory; |
56 | 54 | import com.alipay.sofa.jraft.rpc.RpcServer; |
57 | 55 | import com.alipay.sofa.jraft.rpc.impl.BoltRpcServer; |
58 | 56 | import com.alipay.sofa.jraft.util.Endpoint; |
59 | | -import com.alipay.sofa.jraft.util.ThreadId; |
60 | 57 | import com.alipay.sofa.jraft.util.internal.ThrowUtil; |
61 | 58 |
|
62 | 59 | import io.netty.channel.ChannelHandler; |
@@ -88,8 +85,12 @@ public synchronized boolean init(PDConfig.Raft config) { |
88 | 85 | } |
89 | 86 | this.config = config; |
90 | 87 |
|
| 88 | + // Wire configured rpc timeout into RaftRpcClient so the Bolt transport |
| 89 | + // timeout and the future.get() caller timeout in getLeaderGrpcAddress() are consistent. |
91 | 90 | raftRpcClient = new RaftRpcClient(); |
92 | | - raftRpcClient.init(new RpcOptions()); |
| 91 | + RpcOptions rpcOptions = new RpcOptions(); |
| 92 | + rpcOptions.setRpcDefaultTimeout(config.getRpcTimeout()); |
| 93 | + raftRpcClient.init(rpcOptions); |
93 | 94 |
|
94 | 95 | String raftPath = config.getDataPath() + "/" + groupId; |
95 | 96 | new File(raftPath).mkdirs(); |
@@ -121,8 +122,7 @@ public synchronized boolean init(PDConfig.Raft config) { |
121 | 122 | nodeOptions.setRpcConnectTimeoutMs(config.getRpcTimeout()); |
122 | 123 | nodeOptions.setRpcDefaultTimeout(config.getRpcTimeout()); |
123 | 124 | nodeOptions.setRpcInstallSnapshotTimeout(config.getRpcTimeout()); |
124 | | - // Set the raft configuration |
125 | | - RaftOptions raftOptions = nodeOptions.getRaftOptions(); |
| 125 | + // TODO: tune RaftOptions for PD (see hugegraph-store PartitionEngine for reference) |
126 | 126 |
|
127 | 127 | nodeOptions.setEnableMetrics(true); |
128 | 128 |
|
@@ -230,31 +230,42 @@ public PeerId getLeader() { |
230 | 230 | } |
231 | 231 |
|
232 | 232 | /** |
233 | | - * Send a message to the leader to get the grpc address; |
| 233 | + * Send a message to the leader to get the grpc address. |
234 | 234 | */ |
235 | 235 | public String getLeaderGrpcAddress() throws ExecutionException, InterruptedException { |
236 | 236 | if (isLeader()) { |
237 | 237 | return config.getGrpcAddress(); |
238 | 238 | } |
239 | 239 |
|
240 | 240 | if (raftNode.getLeaderId() == null) { |
241 | | - waitingForLeader(10000); |
| 241 | + waitingForLeader(config.getRpcTimeout()); |
| 242 | + } |
| 243 | + |
| 244 | + // Cache leader to avoid repeated getLeaderId() calls and guard against |
| 245 | + // waitingForLeader() returning without a leader being elected. |
| 246 | + PeerId leader = raftNode.getLeaderId(); |
| 247 | + if (leader == null) { |
| 248 | + throw new ExecutionException(new IllegalStateException("Leader is not ready")); |
242 | 249 | } |
243 | 250 |
|
244 | 251 | try { |
245 | 252 | RaftRpcProcessor.GetMemberResponse response = raftRpcClient |
246 | | - .getGrpcAddress(raftNode.getLeaderId().getEndpoint().toString()) |
| 253 | + .getGrpcAddress(leader.getEndpoint().toString()) |
247 | 254 | .get(config.getRpcTimeout(), TimeUnit.MILLISECONDS); |
248 | 255 | if (response != null && response.getGrpcAddress() != null) { |
249 | 256 | return response.getGrpcAddress(); |
250 | 257 | } |
251 | 258 | } catch (TimeoutException | ExecutionException e) { |
252 | | - log.warn("Failed to get leader gRPC address via RPC, falling back to endpoint derivation", e); |
| 259 | + // TODO: a more complete fix would need a source of truth for the leader's |
| 260 | + // actual grpcAddress rather than deriving it from the local node's port config. |
| 261 | + throw new ExecutionException( |
| 262 | + String.format("Failed to resolve leader gRPC address for %s", leader), e); |
253 | 263 | } |
254 | 264 |
|
255 | | - // Fallback: derive from raft endpoint IP + local gRPC port (best effort) |
256 | | - String leaderIp = raftNode.getLeaderId().getEndpoint().getIp(); |
257 | | - return leaderIp + ":" + config.getGrpcPort(); |
| 265 | + log.warn("Leader gRPC address field is null in RPC response for {}", leader); |
| 266 | + throw new ExecutionException( |
| 267 | + new IllegalStateException( |
| 268 | + String.format("Leader gRPC address unavailable for %s", leader))); |
258 | 269 | } |
259 | 270 |
|
260 | 271 | /** |
@@ -380,7 +391,8 @@ private boolean peerEquals(PeerId p1, PeerId p2) { |
380 | 391 | if (p1 == null || p2 == null) { |
381 | 392 | return false; |
382 | 393 | } |
383 | | - return Objects.equals(p1.getIp(), p2.getIp()) && Objects.equals(p1.getPort(), p2.getPort()); |
| 394 | + return Objects.equals(p1.getIp(), p2.getIp()) && |
| 395 | + Objects.equals(p1.getPort(), p2.getPort()); |
384 | 396 | } |
385 | 397 |
|
386 | 398 | private Replicator.State getReplicatorState(PeerId peerId) { |
|
0 commit comments