Skip to content

Commit f92a8c3

Browse files
Remove extra retries on 503 for DBFS delete. (#283)
* Removed the extra retries on 503 for DBFS delete. * Refactored DBFS partial delete exception handling and added an additional error message.
1 parent ebf4879 commit f92a8c3

2 files changed

Lines changed: 6 additions & 46 deletions

File tree

databricks_cli/dbfs/api.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import tempfile
2929

3030
import re
31-
import time
3231
import click
3332

3433
from requests.exceptions import HTTPError
@@ -39,8 +38,6 @@
3938
from databricks_cli.dbfs.exceptions import LocalFileExistsException
4039

4140
BUFFER_SIZE_BYTES = 2**20
42-
DELETE_MAX_CONSECUTIVE_503_RETRIES = 3
43-
DELETE_503_RETRY_DELAY_MILLIS = 30 * 1000
4441

4542

4643
class ParseException(Exception):
@@ -77,14 +74,12 @@ def __eq__(self, other):
7774
class DbfsErrorCodes(object):
7875
RESOURCE_DOES_NOT_EXIST = 'RESOURCE_DOES_NOT_EXIST'
7976
RESOURCE_ALREADY_EXISTS = 'RESOURCE_ALREADY_EXISTS'
80-
TEMPORARILY_UNAVAILABLE = 'TEMPORARILY_UNAVAILABLE'
8177
PARTIAL_DELETE = 'PARTIAL_DELETE'
8278

8379

8480
class DbfsApi(object):
85-
def __init__(self, api_client, delete_retry_delay_millis=DELETE_503_RETRY_DELAY_MILLIS):
81+
def __init__(self, api_client):
8682
self.client = DbfsService(api_client)
87-
self.delete_retry_delay_millis = delete_retry_delay_millis
8883

8984
def list_files(self, dbfs_path, headers=None):
9085
list_response = self.client.list(dbfs_path.absolute_path, headers=headers)
@@ -148,7 +143,6 @@ def get_num_files_deleted(partial_delete_error):
148143
return int(m.group(1))
149144

150145
def delete(self, dbfs_path, recursive, headers=None):
151-
num_consecutive_503_retries = 0
152146
num_files_deleted = 0
153147
while True:
154148
try:
@@ -167,17 +161,10 @@ def delete(self, dbfs_path, recursive, headers=None):
167161
num_files_deleted), nl=False)
168162
except ParseException:
169163
click.echo("\rDelete in progress...\033[K", nl=False)
170-
num_consecutive_503_retries = 0
171164
continue
172-
# Retry at most DELETE_MAX_CONSECUTIVE_503_ERRORS times for other 503 errors
173-
elif num_consecutive_503_retries < DELETE_MAX_CONSECUTIVE_503_RETRIES:
174-
num_consecutive_503_retries += 1
175-
time.sleep(float(self.delete_retry_delay_millis) / 1000)
176-
continue
177-
else:
178-
raise e
179-
else:
180-
raise e
165+
click.echo("\rDeleted at least {} files but interrupted by error.\033[K".format(
166+
num_files_deleted))
167+
raise e
181168
break
182169
click.echo("\rDelete finished successfully.\033[K")
183170

tests/dbfs/test_api.py

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,6 @@ def get_resource_does_not_exist_exception():
4848
return requests.exceptions.HTTPError(response=response)
4949

5050

51-
def get_temporarily_unavailable_exception():
52-
response = requests.Response()
53-
response.status_code = 503
54-
response._content = ('{{"error_code": "{}"}}'.format(api.DbfsErrorCodes.TEMPORARILY_UNAVAILABLE)).encode() # NOQA
55-
return requests.exceptions.HTTPError(response=response)
56-
57-
5851
def get_partial_delete_exception(message="[...] operation has deleted 10 files [...]"):
5952
response = requests.Response()
6053
response.status_code = 503
@@ -182,33 +175,13 @@ def test_cat(self, dbfs_api):
182175

183176
def test_partial_delete(self, dbfs_api):
184177
e_partial_delete = get_partial_delete_exception()
185-
e_temporarily_unavailable = get_temporarily_unavailable_exception()
186-
# Simulate partial deletes and 503 exceptions followed by a full successful delete
187-
exception_sequence = \
188-
[e_temporarily_unavailable, e_partial_delete, e_partial_delete] + \
189-
[e_temporarily_unavailable] * api.DELETE_MAX_CONSECUTIVE_503_RETRIES + \
190-
[e_partial_delete, None]
178+
# Simulate 3 partial deletes followed by a full successful delete
179+
exception_sequence = [e_partial_delete, e_partial_delete, e_partial_delete, None]
191180
dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
192181
dbfs_api.delete_retry_delay_millis = 1
193182
# Should succeed
194183
dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
195184

196-
def test_partial_delete_service_unavailable(self, dbfs_api):
197-
e_partial_delete = get_partial_delete_exception()
198-
e_temporarily_unavailable = get_temporarily_unavailable_exception()
199-
# Simulate more than api.DELETE_MAX_CONSECUTIVE_503_ERRORS 503 errors that are not partial
200-
# deletes (error_code != PARTIAL_DELETE)
201-
exception_sequence = \
202-
[e_partial_delete] + \
203-
[e_temporarily_unavailable] * (api.DELETE_MAX_CONSECUTIVE_503_RETRIES + 1) + \
204-
[e_partial_delete, None]
205-
dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
206-
dbfs_api.delete_retry_delay_millis = 1
207-
with pytest.raises(e_temporarily_unavailable.__class__) as thrown:
208-
dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
209-
# Should raise the same e_temporarily_unavailable exception instance
210-
assert thrown.value == e_temporarily_unavailable
211-
212185
def test_partial_delete_exception_message_parse_error(self, dbfs_api):
213186
message = "unexpected partial delete exception message"
214187
e_partial_delete = get_partial_delete_exception(message)

0 commit comments

Comments
 (0)