1313from dss .error import DSSException , dss_handler
1414from dss .storage .blobstore import test_object_exists
1515from dss .storage .hcablobstore import BlobStore , compose_blob_key
16- from dss .storage .identifiers import CollectionFQID , CollectionTombstoneID
16+ from dss .storage .identifiers import CollectionFQID , CollectionTombstoneID , COLLECTION_PREFIX
1717from dss .util import security , hashabledict , UrlBuilder
1818from dss .util .version import datetime_to_version_format
1919from dss .storage .blobstore import idempotent_save
20-
20+ from dss . collections import owner_lookup
2121from cloud_blobstore import BlobNotFoundError
2222
2323MAX_METADATA_SIZE = 1024 * 1024
2424
2525logger = logging .getLogger (__name__ )
2626
27+
2728def get_impl (uuid : str , replica : str , version : str = None ):
2829 uuid = uuid .lower ()
2930 bucket = Replica [replica ].bucket
@@ -46,43 +47,41 @@ def get_impl(uuid: str, replica: str, version: str = None):
4647 raise DSSException (404 , "not_found" , "Could not find collection for UUID {}" .format (uuid ))
4748 return json .loads (collection_blob )
4849
49- def fetch_collections (handle , bucket , collection_keys ):
50- authenticated_user_email = security .get_token_email (request .token_info )
51-
52- all_collections = []
53- for key in collection_keys :
54- uuid , version = key [len ('collections/' ):].split ('.' , 1 )
55- assert version != 'dead'
56- collection = json .loads (handle .get (bucket , key ))
57- if collection ['owner' ] == authenticated_user_email :
58- all_collections .append ({'collection_uuid' : uuid ,
59- 'collection_version' : version ,
60- 'collection' : collection })
61- return all_collections
6250
6351@dss_handler
6452@security .authorized_group_required (['hca' ])
65- def listcollections (replica : str , per_page : int , start_at : int = 0 ):
66- bucket = Replica [replica ].bucket
67- handle = Config .get_blobstore_handle (Replica [replica ])
53+ def list_collections (per_page : int , start_at : int = 0 ):
54+ """
55+ Return a list of a user's collections.
56+
57+ Collection uuids are indexed and called by the user's email in a dynamoDB table.
58+
59+ :param int per_page: # of collections returned per paged response.
60+ :param int start_at: Where the next chunk of paged response should start at.
61+ :return: A dictionary containing a list of dictionaries looking like:
62+ {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]}
63+ """
64+ # TODO: Replica is unused, so this does not use replica. Appropriate?
65+ owner = security .get_token_email (request .token_info )
6866
69- # expensively list every collection file in the bucket, even those not belonging to the user (possibly 1000's... )
70- collection_keys = [i for i in handle .list (bucket , prefix = 'collections' ) if not i .endswith ('dead' )]
67+ collections = []
68+ for collection in owner_lookup .get_collection_fqids_for_owner (owner ):
69+ fqid = CollectionFQID .from_key (f'{ COLLECTION_PREFIX } /{ collection } ' )
70+ collections .append ({'uuid' : fqid .uuid , 'version' : fqid .version })
7171
7272 # paged response
73- if len (collection_keys ) - start_at > per_page :
73+ if len (collections ) - start_at > per_page :
7474 next_url = UrlBuilder (request .url )
7575 next_url .replace_query ("start_at" , str (start_at + per_page ))
76- # each chunk will be searched for collections belonging to that user (even more expensive; per bucket file)
77- # hits returned will vary between zero and the "per_page" size of the chunk
78- collections = fetch_collections (handle , bucket , collection_keys [start_at :start_at + per_page ])
79- response = make_response (jsonify ({'collections' : collections }), requests .codes .partial )
76+ collection_page = collections [start_at :start_at + per_page ]
77+ response = make_response (jsonify ({'collections' : collection_page }), requests .codes .partial )
8078 response .headers ['Link' ] = f"<{ next_url } >; rel='next'"
8179 return response
8280 # single response returning all collections (or those remaining)
8381 else :
84- collections = fetch_collections (handle , bucket , collection_keys [start_at :])
85- return jsonify ({'collections' : collections }), requests .codes .ok
82+ collection_page = collections [start_at :]
83+ return jsonify ({'collections' : collection_page }), requests .codes .ok
84+
8685
8786@dss_handler
8887@security .authorized_group_required (['hca' ])
@@ -93,6 +92,7 @@ def get(uuid: str, replica: str, version: str = None):
9392 raise DSSException (requests .codes .forbidden , "forbidden" , f"Collection access denied" )
9493 return collection_body
9594
95+
9696@dss_handler
9797@security .authorized_group_required (['hca' ])
9898def put (json_request_body : dict , replica : str , uuid : str , version : str ):
@@ -107,11 +107,16 @@ def put(json_request_body: dict, replica: str, uuid: str, version: str):
107107 timestamp = datetime .datetime .utcnow ()
108108 version = datetime_to_version_format (timestamp )
109109 collection_version = version
110+ # update dynamoDB; used to speed up lookup time; will not update if owner already associated w/uuid
111+ owner_lookup .put_collection (owner = authenticated_user_email ,
112+ collection_fqid = str (CollectionFQID (collection_uuid , collection_version )))
113+ # add the collection file to the bucket
110114 handle .upload_file_handle (Replica [replica ].bucket ,
111115 CollectionFQID (collection_uuid , collection_version ).to_key (),
112116 io .BytesIO (json .dumps (collection_body ).encode ("utf-8" )))
113117 return jsonify (dict (uuid = collection_uuid , version = collection_version )), requests .codes .created
114118
119+
115120@dss_handler
116121@security .authorized_group_required (['hca' ])
117122def patch (uuid : str , json_request_body : dict , replica : str , version : str ):
@@ -143,12 +148,14 @@ def patch(uuid: str, json_request_body: dict, replica: str, version: str):
143148 io .BytesIO (json .dumps (collection ).encode ("utf-8" )))
144149 return jsonify (dict (uuid = uuid , version = new_collection_version )), requests .codes .ok
145150
151+
146152def _dedpuplicate_contents (contents : List ) -> List :
147153 dedup_collection : OrderedDict [int , dict ] = OrderedDict ()
148154 for item in contents :
149155 dedup_collection [hash (tuple (sorted (item .items ())))] = item
150156 return list (dedup_collection .values ())
151157
158+
152159@dss_handler
153160@security .authorized_group_required (['hca' ])
154161def delete (uuid : str , replica : str ):
@@ -175,9 +182,11 @@ def delete(uuid: str, replica: str):
175182 f"collection tombstone with UUID { uuid } already exists" )
176183 status_code = requests .codes .ok
177184 response_body = dict () # type: dict
178-
185+ # update dynamoDB
186+ owner_lookup .delete_collection_uuid (owner = authenticated_user_email , uuid = uuid )
179187 return jsonify (response_body ), status_code
180188
189+
181190@functools .lru_cache (maxsize = 64 )
182191def get_json_metadata (entity_type : str , uuid : str , version : str , replica : Replica , blobstore_handle : BlobStore ):
183192 try :
@@ -198,6 +207,7 @@ def get_json_metadata(entity_type: str, uuid: str, version: str, replica: Replic
198207 "invalid_link" ,
199208 "Could not find file for UUID {}" .format (uuid ))
200209
210+
201211def resolve_content_item (replica : Replica , blobstore_handle : BlobStore , item : dict ):
202212 try :
203213 if item ["type" ] in {"file" , "bundle" , "collection" }:
@@ -221,6 +231,7 @@ def resolve_content_item(replica: Replica, blobstore_handle: BlobStore, item: di
221231 'Error while parsing the link "{}": {}: {}' .format (item , type (e ).__name__ , e )
222232 )
223233
234+
224235def verify_collection (contents : List [dict ], replica : Replica , blobstore_handle : BlobStore , batch_size = 64 ):
225236 """
226237 Given user-supplied collection contents that pass schema validation, resolve all entities in the collection and
0 commit comments