Skip to content

Commit 138a909

Browse files
RUBY-3750 Clarify NoWritesPerformed error label (#3016)
1 parent 349d211 commit 138a909

2 files changed

Lines changed: 206 additions & 10 deletions

File tree

lib/mongo/retryable/write_worker.rb

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def nro_write_with_retry(_write_concern, context:, &block)
109109

110110
if options[:retry_writes]
111111
error_count = 0
112+
error_to_raise = nil
112113
begin
113114
result = server.with_connection(connection_global_id: context.connection_global_id) do |connection|
114115
yield connection, nil, context
@@ -120,8 +121,12 @@ def nro_write_with_retry(_write_concern, context:, &block)
120121
rescue *retryable_exceptions, Error::PoolError, Error::OperationFailure::Family => e
121122
if retryable_overload_error?(e)
122123
error_count += 1
124+
error_to_raise ||= e
125+
unless e.respond_to?(:label?) && e.label?('NoWritesPerformed')
126+
error_to_raise = e
127+
end
123128
delay = retry_policy.backoff_delay(error_count)
124-
raise e unless retry_policy.should_retry_overload?(error_count, delay, context: context)
129+
raise error_to_raise unless retry_policy.should_retry_overload?(error_count, delay, context: context)
125130

126131
log_retry(e, message: 'Write retry (overload backoff)')
127132
sleep(delay)
@@ -131,8 +136,8 @@ def nro_write_with_retry(_write_concern, context:, &block)
131136
error: e, timeout: context.remaining_timeout_sec
132137
)
133138
rescue Error, Error::AuthError => select_err
134-
e.add_note("later retry failed: #{select_err.class}: #{select_err}")
135-
raise e
139+
error_to_raise.add_note("later retry failed: #{select_err.class}: #{select_err}")
140+
raise error_to_raise
136141
end
137142
retry
138143
else
@@ -375,9 +380,14 @@ def retry_write(original_error, txn_num, context:, failed_server: nil, &block)
375380
# Retry loop for overload write errors with exponential backoff.
376381
def overload_write_retry(last_error, session, txn_num, context:, failed_server:, error_count:,
377382
was_starting_transaction: false)
383+
# Track the error to return per the NoWritesPerformed spec rules:
384+
# - first error is always saved
385+
# - only update when a new error does NOT have NoWritesPerformed
386+
error_to_raise = last_error
387+
378388
loop do
379389
delay = retry_policy.backoff_delay(error_count)
380-
raise last_error unless retry_policy.should_retry_overload?(error_count, delay, context: context)
390+
raise error_to_raise unless retry_policy.should_retry_overload?(error_count, delay, context: context)
381391

382392
log_retry(last_error, message: 'Write retry (overload backoff)')
383393
sleep(delay)
@@ -389,13 +399,13 @@ def overload_write_retry(last_error, session, txn_num, context:, failed_server:,
389399
timeout: context.remaining_timeout_sec
390400
)
391401
rescue Error, Error::AuthError => e
392-
last_error.add_note("later retry failed: #{e.class}: #{e}")
393-
raise last_error
402+
error_to_raise.add_note("later retry failed: #{e.class}: #{e}")
403+
raise error_to_raise
394404
end
395405

396406
unless server.retry_writes?
397-
last_error.add_note('did not retry because server does not support retryable writes')
398-
raise last_error
407+
error_to_raise.add_note('did not retry because server does not support retryable writes')
408+
raise error_to_raise
399409
end
400410

401411
begin
@@ -417,13 +427,16 @@ def overload_write_retry(last_error, session, txn_num, context:, failed_server:,
417427
else
418428
raise e unless is_overload || e.write_retryable?
419429
end
430+
unless e.respond_to?(:label?) && e.label?('NoWritesPerformed')
431+
error_to_raise = e
432+
end
420433
retry_policy.record_non_overload_retry_failure unless is_overload
421434
context = context.with(overload_only_retry: false) unless is_overload
422435
failed_server = server
423436
last_error = e
424437
rescue Error, Error::AuthError => e
425-
last_error.add_note("later retry failed: #{e.class}: #{e}")
426-
raise last_error
438+
error_to_raise.add_note("later retry failed: #{e.class}: #{e}")
439+
raise error_to_raise
427440
end
428441
end
429442
end
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# frozen_string_literal: true
2+
3+
require 'spec_helper'
4+
5+
# Retryable Writes Prose Test 6: Test error propagation after
6+
# encountering multiple errors.
7+
#
8+
# Spec reference:
9+
# specifications/source/retryable-writes/tests/README.md
10+
# "6. Test error propagation after encountering multiple errors."
11+
describe 'Retryable writes prose test 6: error propagation' do
12+
require_topology :replica_set
13+
min_server_version '6.0'
14+
15+
let(:client) do
16+
authorized_client.with(retry_writes: true)
17+
end
18+
19+
let(:admin_client) { client.use(:admin) }
20+
21+
let(:collection) { client['error-propagation-prose-test'] }
22+
23+
after do
24+
admin_client.command(configureFailPoint: 'failCommand', mode: 'off')
25+
rescue Mongo::Error
26+
# Ignore cleanup failures.
27+
end
28+
29+
# Case 1: Test that drivers return the correct error when receiving
30+
# only errors without NoWritesPerformed.
31+
context 'Case 1: only errors without NoWritesPerformed' do
32+
it 'returns the most recent error (10107)' do
33+
# Step 2: Configure a fail point with error code 91 and
34+
# RetryableError + SystemOverloadedError labels.
35+
admin_client.command(
36+
configureFailPoint: 'failCommand',
37+
mode: { times: 1 },
38+
data: {
39+
failCommands: [ 'insert' ],
40+
errorCode: 91,
41+
errorLabels: %w[RetryableError SystemOverloadedError]
42+
}
43+
)
44+
45+
# Step 3: Via CommandFailedEvent, configure a fail point with
46+
# error code 10107 once the 91 error is observed.
47+
failpoint_set = false
48+
subscriber = Mrss::EventSubscriber.new
49+
client.subscribe(Mongo::Monitoring::COMMAND, subscriber)
50+
51+
allow(subscriber).to receive(:failed).and_wrap_original do |m, event|
52+
m.call(event)
53+
if !failpoint_set && event.command_name == 'insert'
54+
failpoint_set = true
55+
admin_client.command(
56+
configureFailPoint: 'failCommand',
57+
mode: 'alwaysOn',
58+
data: {
59+
failCommands: [ 'insert' ],
60+
errorCode: 10_107,
61+
errorLabels: %w[RetryableError SystemOverloadedError]
62+
}
63+
)
64+
end
65+
end
66+
67+
# Step 4: Attempt an insertOne. Assert error code is 10107.
68+
error = nil
69+
begin
70+
collection.insert_one(x: 1)
71+
rescue Mongo::Error::OperationFailure => e
72+
error = e
73+
end
74+
75+
expect(error).not_to be_nil
76+
expect(error.code).to eq(10_107)
77+
end
78+
end
79+
80+
# Case 2: Test that drivers return the correct error when receiving
81+
# only errors with NoWritesPerformed.
82+
context 'Case 2: only errors with NoWritesPerformed' do
83+
it 'returns the first error (91)' do
84+
# Step 2: Configure a fail point with error code 91 and
85+
# RetryableError + SystemOverloadedError + NoWritesPerformed labels.
86+
admin_client.command(
87+
configureFailPoint: 'failCommand',
88+
mode: { times: 1 },
89+
data: {
90+
failCommands: [ 'insert' ],
91+
errorCode: 91,
92+
errorLabels: %w[RetryableError SystemOverloadedError NoWritesPerformed]
93+
}
94+
)
95+
96+
# Step 3: Via CommandFailedEvent, configure a fail point with
97+
# error code 10107 and NoWritesPerformed once the 91 error is observed.
98+
failpoint_set = false
99+
subscriber = Mrss::EventSubscriber.new
100+
client.subscribe(Mongo::Monitoring::COMMAND, subscriber)
101+
102+
allow(subscriber).to receive(:failed).and_wrap_original do |m, event|
103+
m.call(event)
104+
if !failpoint_set && event.command_name == 'insert'
105+
failpoint_set = true
106+
admin_client.command(
107+
configureFailPoint: 'failCommand',
108+
mode: 'alwaysOn',
109+
data: {
110+
failCommands: [ 'insert' ],
111+
errorCode: 10_107,
112+
errorLabels: %w[RetryableError SystemOverloadedError NoWritesPerformed]
113+
}
114+
)
115+
end
116+
end
117+
118+
# Step 4: Attempt an insertOne. Assert error code is 91.
119+
error = nil
120+
begin
121+
collection.insert_one(x: 1)
122+
rescue Mongo::Error::OperationFailure => e
123+
error = e
124+
end
125+
126+
expect(error).not_to be_nil
127+
expect(error.code).to eq(91)
128+
end
129+
end
130+
131+
# Case 3: Test that drivers return the correct error when receiving
132+
# some errors with NoWritesPerformed and some without.
133+
context 'Case 3: mixed errors with and without NoWritesPerformed' do
134+
it 'returns the error without NoWritesPerformed (91)' do
135+
# Step 2: Via CommandFailedEvent, configure a fail point with
136+
# error code 91 and NoWritesPerformed for subsequent retries.
137+
failpoint_set = false
138+
subscriber = Mrss::EventSubscriber.new
139+
client.subscribe(Mongo::Monitoring::COMMAND, subscriber)
140+
141+
allow(subscriber).to receive(:failed).and_wrap_original do |m, event|
142+
m.call(event)
143+
if !failpoint_set && event.command_name == 'insert'
144+
failpoint_set = true
145+
admin_client.command(
146+
configureFailPoint: 'failCommand',
147+
mode: 'alwaysOn',
148+
data: {
149+
failCommands: [ 'insert' ],
150+
errorCode: 91,
151+
errorLabels: %w[RetryableError SystemOverloadedError NoWritesPerformed]
152+
}
153+
)
154+
end
155+
end
156+
157+
# Step 3: Configure initial fail point with error code 91
158+
# WITHOUT NoWritesPerformed.
159+
admin_client.command(
160+
configureFailPoint: 'failCommand',
161+
mode: { times: 1 },
162+
data: {
163+
failCommands: [ 'insert' ],
164+
errorCode: 91,
165+
errorLabels: %w[RetryableError SystemOverloadedError]
166+
}
167+
)
168+
169+
# Step 4: Attempt an insertOne. Assert error code is 91 and
170+
# error does NOT contain NoWritesPerformed label.
171+
error = nil
172+
begin
173+
collection.insert_one(x: 1)
174+
rescue Mongo::Error::OperationFailure => e
175+
error = e
176+
end
177+
178+
expect(error).not_to be_nil
179+
expect(error.code).to eq(91)
180+
expect(error.label?('NoWritesPerformed')).to be false
181+
end
182+
end
183+
end

0 commit comments

Comments
 (0)