Skip to content

Commit deb04f3

Browse files
authored
feat: add object restore functionality (#677)
* feat: add object restore functionality * add unit tests for object restore * lint * more lint * add error test * lint * additional database tests * add test for no generation * fix no generation return * simplify filter and add hard delete time * add ability to LIST soft deleted objects * fixes * add ability to get soft deleted object * lint * fixes * linter * boolean fixes * cover additional failure case * sort by generation
1 parent 699d50e commit deb04f3

9 files changed

Lines changed: 488 additions & 10 deletions

testbench/database.py

Lines changed: 154 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import collections
1616
import copy
17+
import datetime
1718
import json
1819
import os
1920
import pathlib
@@ -37,11 +38,13 @@ def __init__(
3738
rewrites,
3839
retry_tests,
3940
supported_methods,
41+
soft_deleted_objects,
4042
):
4143
self._resources_lock = threading.RLock()
4244
self._buckets = buckets
4345
self._objects = objects
4446
self._live_generations = live_generations
47+
self._soft_deleted_objects = soft_deleted_objects
4548

4649
self._uploads_lock = threading.RLock()
4750
self._uploads = uploads
@@ -58,14 +61,15 @@ def __init__(
5861

5962
@classmethod
6063
def init(cls):
61-
return cls({}, {}, {}, {}, {}, {}, [])
64+
return cls({}, {}, {}, {}, {}, {}, [], {})
6265

6366
def clear(self):
6467
"""Clear all data except for the supported method list."""
6568
with self._resources_lock:
6669
self._buckets = {}
6770
self._objects = {}
6871
self._live_generations = {}
72+
self._soft_deleted_objects = {}
6973
with self._uploads_lock:
7074
self._uploads = {}
7175
with self._rewrites_lock:
@@ -101,6 +105,7 @@ def insert_bucket(self, bucket, context):
101105
self._buckets[bucket.metadata.name] = bucket
102106
self._objects[bucket.metadata.name] = {}
103107
self._live_generations[bucket.metadata.name] = {}
108+
self._soft_deleted_objects[bucket.metadata.name] = {}
104109

105110
def list_bucket(self, project_id, prefix, context):
106111
with self._resources_lock:
@@ -133,6 +138,7 @@ def delete_bucket(self, bucket_name, context, preconditions=[]):
133138
del self._buckets[bucket.metadata.name]
134139
del self._objects[bucket.metadata.name]
135140
del self._live_generations[bucket.metadata.name]
141+
del self._soft_deleted_objects[bucket.metadata.name]
136142

137143
def insert_test_bucket(self):
138144
"""Automatically create a bucket if needed.
@@ -173,6 +179,7 @@ def __extract_list_object_request_grpc(cls, request):
173179
request.lexicographic_end,
174180
request.include_trailing_delimiter,
175181
request.match_glob,
182+
request.soft_deleted,
176183
)
177184

178185
@classmethod
@@ -186,6 +193,7 @@ def __extract_list_object_request(cls, request, context):
186193
end_offset = request.args.get("endOffset")
187194
include_trailing_delimiter = request.args.get("includeTrailingDelimiter", False)
188195
match_glob = request.args.get("matchGlob", None)
196+
soft_deleted = request.args.get("softDeleted", False)
189197
return (
190198
delimiter,
191199
prefix,
@@ -194,6 +202,7 @@ def __extract_list_object_request(cls, request, context):
194202
end_offset,
195203
include_trailing_delimiter,
196204
match_glob,
205+
soft_deleted,
197206
)
198207

199208
def __get_live_generation(self, bucket_name, object_name, context):
@@ -208,9 +217,66 @@ def __del_live_generation(self, bucket_name, object_name, context):
208217
bucket_key = self.__bucket_key(bucket_name, context)
209218
self._live_generations[bucket_key].pop(object_name, None)
210219

220+
def __soft_delete_object(
221+
self, bucket_name, object_name, blob, retention_duration, context
222+
):
223+
bucket_key = self.__bucket_key(bucket_name, context)
224+
if self._soft_deleted_objects[bucket_key].get(object_name) is None:
225+
self._soft_deleted_objects[bucket_key][object_name] = []
226+
soft_delete_time = datetime.datetime.now(datetime.timezone.utc)
227+
hard_delete_time = soft_delete_time + datetime.timedelta(0, retention_duration)
228+
blob.metadata.soft_delete_time.FromDatetime(soft_delete_time)
229+
blob.metadata.hard_delete_time.FromDatetime(hard_delete_time)
230+
self._soft_deleted_objects[bucket_key][object_name].append(blob)
231+
232+
def __remove_expired_objects_from_soft_delete(
233+
self, bucket_name, object_name, context
234+
):
235+
bucket_key = self.__bucket_key(bucket_name, context)
236+
now = datetime.datetime.now()
237+
238+
if self._soft_deleted_objects[bucket_key].get(object_name) is not None:
239+
self._soft_deleted_objects[bucket_key][object_name] = list(
240+
filter(
241+
lambda blob: now < blob.metadata.hard_delete_time.ToDatetime(),
242+
self._soft_deleted_objects[bucket_key][object_name],
243+
)
244+
)
245+
246+
def __remove_restored_soft_deleted_object(
247+
self, bucket_name, object_name, generation, context
248+
):
249+
bucket_key = self.__bucket_key(bucket_name, context)
250+
if self._soft_deleted_objects[bucket_key].get(object_name) is not None:
251+
self._soft_deleted_objects[bucket_key][object_name] = list(
252+
filter(
253+
lambda blob: blob.metadata.generation == generation,
254+
self._soft_deleted_objects[bucket_key][object_name],
255+
)
256+
)
257+
258+
def __get_soft_deleted_object(self, bucket_name, object_name, generation, context):
259+
bucket_key = self.__bucket_key(bucket_name, context)
260+
blobs = self._soft_deleted_objects[bucket_key].get(object_name, [])
261+
blob = next(
262+
(blob for blob in blobs if blob.metadata.generation == generation), None
263+
)
264+
if blob is None:
265+
return testbench.error.notfound(object_name, context)
266+
return blob
267+
268+
def __get_all_soft_deleted_objects(self, bucket_name, context):
269+
bucket_key = self.__bucket_key(bucket_name, context)
270+
all_soft_deleted = []
271+
for soft_deleted_list in self._soft_deleted_objects[bucket_key].values():
272+
all_soft_deleted.extend(soft_deleted_list)
273+
all_soft_deleted.sort(key=lambda blob: blob.metadata.generation)
274+
return all_soft_deleted
275+
211276
def list_object(self, request, bucket_name, context):
212277
with self._resources_lock:
213278
bucket = self.__get_bucket_for_object(bucket_name, context)
279+
bucket_with_metadata = self.get_bucket(bucket_name, context)
214280
(
215281
delimiter,
216282
prefix,
@@ -219,14 +285,29 @@ def list_object(self, request, bucket_name, context):
219285
end_offset,
220286
include_trailing_delimiter,
221287
match_glob,
288+
soft_deleted,
222289
) = self.__extract_list_object_request(request, context)
223290
items = []
224291
prefixes = set()
225-
for obj in bucket.values():
292+
293+
if (
294+
soft_deleted
295+
and not bucket_with_metadata.metadata.HasField("soft_delete_policy")
296+
) or (soft_deleted and versions):
297+
return testbench.error.invalid("bad request", context)
298+
299+
objects = bucket.values()
300+
if soft_deleted:
301+
objects = self.__get_all_soft_deleted_objects(bucket_name, context)
302+
303+
for obj in objects:
226304
generation = obj.metadata.generation
227305
name = obj.metadata.name
228-
if not versions and generation != self.__get_live_generation(
229-
bucket_name, name, context
306+
if (
307+
not soft_deleted
308+
and not versions
309+
and generation
310+
!= self.__get_live_generation(bucket_name, name, context)
230311
):
231312
continue
232313
if name.find(prefix) != 0:
@@ -282,12 +363,27 @@ def __get_object(
282363
return blob, live_generation
283364

284365
def get_object(
285-
self, bucket_name, object_name, context=None, generation=None, preconditions=[]
366+
self,
367+
bucket_name,
368+
object_name,
369+
context=None,
370+
generation=None,
371+
preconditions=[],
372+
soft_deleted=False,
286373
):
287374
with self._resources_lock:
288-
blob, _ = self.__get_object(
289-
bucket_name, object_name, context, generation, preconditions
290-
)
375+
blob = None
376+
if not soft_deleted:
377+
blob, _ = self.__get_object(
378+
bucket_name, object_name, context, generation, preconditions
379+
)
380+
else:
381+
bucket_with_metadata = self.get_bucket(bucket_name, context)
382+
if not bucket_with_metadata.metadata.HasField("soft_delete_policy"):
383+
testbench.error.invalid("SoftDeletePolicyRequired", context)
384+
blob = self.__get_soft_deleted_object(
385+
bucket_name, object_name, int(generation), context
386+
)
291387
# return a snapshot copy of the blob/blob.metadata
292388
if blob is None:
293389
return None
@@ -336,6 +432,15 @@ def delete_object(
336432
if generation == 0 or live_generation == generation:
337433
self.__del_live_generation(bucket_name, object_name, context)
338434
bucket = self.__get_bucket_for_object(bucket_name, context)
435+
bucket_with_metadata = self.get_bucket(bucket_name, context)
436+
if bucket_with_metadata.metadata.HasField("soft_delete_policy"):
437+
self.__soft_delete_object(
438+
bucket_name,
439+
object_name,
440+
blob,
441+
bucket_with_metadata.metadata.soft_delete_policy.retention_duration.seconds,
442+
context,
443+
)
339444
bucket.pop("%s#%d" % (blob.metadata.name, blob.metadata.generation), None)
340445

341446
def do_update_object(
@@ -354,6 +459,47 @@ def do_update_object(
354459
)
355460
return update_fn(blob, live_generation)
356461

462+
def restore_object(
463+
self,
464+
bucket_name: str,
465+
object_name: str,
466+
generation: int,
467+
preconditions=[],
468+
context=None,
469+
) -> T:
470+
with self._resources_lock:
471+
bucket_with_metadata = self.get_bucket(bucket_name, context)
472+
if not bucket_with_metadata.metadata.HasField("soft_delete_policy"):
473+
testbench.error.invalid("SoftDeletePolicyRequired", context)
474+
bucket = self.__get_bucket_for_object(bucket_name, context)
475+
blob = bucket.get("%s#%d" % (object_name, generation), None)
476+
if blob is not None:
477+
testbench.error.not_soft_deleted(context)
478+
479+
self.__remove_expired_objects_from_soft_delete(
480+
bucket_name,
481+
object_name,
482+
context,
483+
)
484+
blob = self.__get_soft_deleted_object(
485+
bucket_name, object_name, generation, context
486+
)
487+
if blob is not None:
488+
blob.metadata.create_time.FromDatetime(
489+
datetime.datetime.now(datetime.timezone.utc)
490+
)
491+
blob.metadata.ClearField("soft_delete_time")
492+
blob.metadata.metageneration = 1
493+
blob.metadata.generation = blob.metadata.generation + 1
494+
if bucket_with_metadata.metadata.autoclass.enabled is True:
495+
blob.metadata.storage_class = "STANDARD"
496+
self.insert_object(bucket_name, blob, context, preconditions)
497+
self.__remove_restored_soft_deleted_object(
498+
bucket_name, object_name, generation, context
499+
)
500+
501+
return blob
502+
357503
# === UPLOAD === #
358504

359505
def get_upload(self, upload_id, context):

testbench/error.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ def mismatch(
112112
generic(_simple_json_error(msg), rest_code, grpc_code, context)
113113

114114

115+
def not_soft_deleted(
116+
context, rest_code=412, grpc_code=grpc.StatusCode.FAILED_PRECONDITION
117+
):
118+
"""This error is returned when object is not soft deleted but is either live or noncurrent"""
119+
msg = "objectNotSoftDeleted"
120+
generic(_simple_json_error(msg), rest_code, grpc_code, context)
121+
122+
115123
def notchanged(msg, context, rest_code=304, grpc_code=grpc.StatusCode.ABORTED):
116124
"""Error returned when if*NotMatch or If-None-Match pre-conditions fail."""
117125
generic(

testbench/grpc_server.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,14 @@ def update_impl(blob, live_generation) -> storage_pb2.Object:
689689
def __get_bucket(self, bucket_name, context) -> storage_pb2.Bucket:
690690
return self.db.get_bucket(bucket_name, context).metadata
691691

692+
@retry_test(method="storage.objects.restore")
693+
def RestoreObject(self, request, context):
694+
preconditions = testbench.common.make_grpc_preconditions(request)
695+
blob = self.db.restore_object(
696+
request.bucket, request.object, request.generation, preconditions, context
697+
)
698+
return blob.metadata
699+
692700
@retry_test(method="storage.objects.insert")
693701
def WriteObject(self, request_iterator, context):
694702
upload, is_resumable = gcs.upload.Upload.init_write_object_grpc(

testbench/rest_server.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,14 +568,20 @@ def object_delete(bucket_name, object_name):
568568
@gcs.route("/b/<bucket_name>/o/<path:object_name>")
569569
@retry_test(method="storage.objects.get")
570570
def object_get(bucket_name, object_name):
571+
soft_deleted = flask.request.args.get("softDeleted", False, bool)
572+
media = flask.request.args.get("alt", None)
573+
generation = flask.request.args.get("generation", None)
574+
if (soft_deleted and generation is None) or (soft_deleted and media == "media"):
575+
return testbench.error.invalid("invalid request", None)
576+
571577
blob = db.get_object(
572578
bucket_name,
573579
object_name,
574-
generation=flask.request.args.get("generation", None),
580+
generation=generation,
575581
preconditions=testbench.common.make_json_preconditions(flask.request),
576582
context=None,
583+
soft_deleted=soft_deleted,
577584
)
578-
media = flask.request.args.get("alt", None)
579585
if media is None or media == "json":
580586
projection = testbench.common.extract_projection(flask.request, "noAcl", None)
581587
fields = flask.request.args.get("fields", None)
@@ -773,6 +779,21 @@ def objects_rewrite(src_bucket_name, src_object_name, dst_bucket_name, dst_objec
773779
return response
774780

775781

782+
@gcs.route("/b/<bucket_name>/o/<path:object_name>/restore", methods=["POST"])
783+
@retry_test(method="storage.objects.restore")
784+
def object_restore(bucket_name, object_name):
785+
if flask.request.args.get("generation") is None:
786+
return testbench.error.invalid("generation", None)
787+
blob = db.restore_object(
788+
bucket_name,
789+
object_name,
790+
int(flask.request.args.get("generation")),
791+
testbench.common.make_json_preconditions(flask.request),
792+
)
793+
projection = testbench.common.extract_projection(flask.request, "noAcl", None)
794+
return testbench.common.filter_response_rest(blob.rest_metadata(), projection, None)
795+
796+
776797
# === OBJECT ACCESS CONTROL === #
777798

778799

0 commit comments

Comments
 (0)