Skip to content

Commit 1fc4fa2

Browse files
committed
rxrpc: Fix congestion management
rxrpc has a problem in its congestion management in that it saves the congestion window size (cwnd) from one call to another, but if this is 0 at the time is saved, then the next call may not actually manage to ever transmit anything. To this end: (1) Don't save cwnd between calls, but rather reset back down to the initial cwnd and re-enter slow-start if data transmission is idle for more than an RTT. (2) Preserve ssthresh instead, as that is a handy estimate of pipe capacity. Knowing roughly when to stop slow start and enter congestion avoidance can reduce the tendency to overshoot and drop larger amounts of packets when probing. In future, cwind growth also needs to be constrained when the window isn't being filled due to being application limited. Reported-by: Simon Wilkinson <sxw@auristor.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org
1 parent 6869ddb commit 1fc4fa2

11 files changed

Lines changed: 60 additions & 20 deletions

File tree

include/trace/events/rxrpc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@
193193
EM(rxrpc_cong_new_low_nack, " NewLowN") \
194194
EM(rxrpc_cong_no_change, " -") \
195195
EM(rxrpc_cong_progress, " Progres") \
196+
EM(rxrpc_cong_idle_reset, " IdleRes") \
196197
EM(rxrpc_cong_retransmit_again, " ReTxAgn") \
197198
EM(rxrpc_cong_rtt_window_end, " RttWinE") \
198199
E_(rxrpc_cong_saw_nack, " SawNack")

net/rxrpc/ar-internal.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ struct rxrpc_peer {
332332
u32 rto_j; /* Retransmission timeout in jiffies */
333333
u8 backoff; /* Backoff timeout */
334334

335-
u8 cong_cwnd; /* Congestion window size */
335+
u8 cong_ssthresh; /* Congestion slow-start threshold */
336336
};
337337

338338
/*
@@ -626,6 +626,7 @@ struct rxrpc_call {
626626
u16 tx_backoff; /* Delay to insert due to Tx failure */
627627
u8 tx_winsize; /* Maximum size of Tx window */
628628
#define RXRPC_TX_MAX_WINDOW 128
629+
ktime_t tx_last_sent; /* Last time a transmission occurred */
629630

630631
/* Received data tracking */
631632
struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
@@ -687,10 +688,10 @@ struct rxrpc_call {
687688
* Summary of a new ACK and the changes it made to the Tx buffer packet states.
688689
*/
689690
struct rxrpc_ack_summary {
691+
u16 nr_acks; /* Number of ACKs in packet */
692+
u16 nr_new_acks; /* Number of new ACKs in packet */
693+
u16 nr_rot_new_acks; /* Number of rotated new ACKs */
690694
u8 ack_reason;
691-
u8 nr_acks; /* Number of ACKs in packet */
692-
u8 nr_new_acks; /* Number of new ACKs in packet */
693-
u8 nr_rot_new_acks; /* Number of rotated new ACKs */
694695
bool saw_nacks; /* Saw NACKs in packet */
695696
bool new_low_nack; /* T if new low NACK found */
696697
bool retrans_timeo; /* T if reTx due to timeout happened */

net/rxrpc/call_accept.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
324324
call->security = conn->security;
325325
call->security_ix = conn->security_ix;
326326
call->peer = rxrpc_get_peer(conn->params.peer);
327-
call->cong_cwnd = call->peer->cong_cwnd;
327+
call->cong_ssthresh = call->peer->cong_ssthresh;
328+
call->tx_last_sent = ktime_get_real();
328329
return call;
329330
}
330331

net/rxrpc/call_object.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,12 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
166166
call->rx_winsize = rxrpc_rx_window_size;
167167
call->tx_winsize = 16;
168168

169-
call->cong_cwnd = 2;
169+
if (RXRPC_TX_SMSS > 2190)
170+
call->cong_cwnd = 2;
171+
else if (RXRPC_TX_SMSS > 1095)
172+
call->cong_cwnd = 3;
173+
else
174+
call->cong_cwnd = 4;
170175
call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
171176

172177
call->rxnet = rxnet;

net/rxrpc/conn_client.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,8 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
363363
if (!cp->peer)
364364
goto error;
365365

366-
call->cong_cwnd = cp->peer->cong_cwnd;
366+
call->tx_last_sent = ktime_get_real();
367+
call->cong_ssthresh = cp->peer->cong_ssthresh;
367368
if (call->cong_cwnd >= call->cong_ssthresh)
368369
call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
369370
else

net/rxrpc/conn_object.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
207207
{
208208
struct rxrpc_connection *conn = call->conn;
209209

210-
call->peer->cong_cwnd = call->cong_cwnd;
210+
call->peer->cong_ssthresh = call->cong_ssthresh;
211211

212212
if (!hlist_unhashed(&call->error_link)) {
213213
spin_lock_bh(&call->peer->lock);

net/rxrpc/input.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,25 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
5858
summary->cumulative_acks = cumulative_acks;
5959
summary->dup_acks = call->cong_dup_acks;
6060

61+
/* If we haven't transmitted anything for >1RTT, we should reset the
62+
* congestion management state.
63+
*/
64+
if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
65+
call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
66+
ktime_before(ktime_add_us(call->tx_last_sent,
67+
call->peer->srtt_us >> 3),
68+
ktime_get_real())
69+
) {
70+
change = rxrpc_cong_idle_reset;
71+
summary->mode = RXRPC_CALL_SLOW_START;
72+
if (RXRPC_TX_SMSS > 2190)
73+
summary->cwnd = 2;
74+
else if (RXRPC_TX_SMSS > 1095)
75+
summary->cwnd = 3;
76+
else
77+
summary->cwnd = 4;
78+
}
79+
6180
switch (call->cong_mode) {
6281
case RXRPC_CALL_SLOW_START:
6382
if (summary->saw_nacks)
@@ -205,7 +224,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
205224

206225
if (call->acks_lowest_nak == call->acks_hard_ack) {
207226
call->acks_lowest_nak = to;
208-
} else if (before_eq(call->acks_lowest_nak, to)) {
227+
} else if (after(to, call->acks_lowest_nak)) {
209228
summary->new_low_nack = true;
210229
call->acks_lowest_nak = to;
211230
}

net/rxrpc/output.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
501501

502502
done:
503503
if (ret >= 0) {
504+
call->tx_last_sent = txb->last_sent;
504505
if (txb->wire.flags & RXRPC_REQUEST_ACK) {
505506
call->peer->rtt_last_req = txb->last_sent;
506507
if (call->peer->rtt_count > 1) {

net/rxrpc/peer_object.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
227227

228228
rxrpc_peer_init_rtt(peer);
229229

230-
if (RXRPC_TX_SMSS > 2190)
231-
peer->cong_cwnd = 2;
232-
else if (RXRPC_TX_SMSS > 1095)
233-
peer->cong_cwnd = 3;
234-
else
235-
peer->cong_cwnd = 4;
230+
peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
236231
trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
237232
}
238233

net/rxrpc/proc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
217217
seq_puts(seq,
218218
"Proto Local "
219219
" Remote "
220-
" Use CW MTU LastUse RTT RTO\n"
220+
" Use SST MTU LastUse RTT RTO\n"
221221
);
222222
return 0;
223223
}
@@ -235,7 +235,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
235235
lbuff,
236236
rbuff,
237237
refcount_read(&peer->ref),
238-
peer->cong_cwnd,
238+
peer->cong_ssthresh,
239239
peer->mtu,
240240
now - peer->last_tx_at,
241241
peer->srtt_us >> 3,

0 commit comments

Comments
 (0)