Skip to content

Commit 3f8a0fa

Browse files
authored
Merge pull request #406 from MLMI2-CSSI/fix_main
remove breaking changes that load on init
2 parents 35c2e12 + f2f3f10 commit 3f8a0fa

2 files changed

Lines changed: 59 additions & 39 deletions

File tree

foundry/foundry.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -50,27 +50,17 @@ class Foundry(FoundryBase):
5050
auths: Any
5151

5252
def __init__(
53-
self, name=None, no_browser=False, no_local_server=False, index="mdf", authorizers=None,
54-
download=True, globus=True, verbose=False, metadata=None, interval=10,
55-
**data
53+
self, no_browser=False, no_local_server=False, index="mdf", authorizers=None, **data
5654
):
5755
"""Initialize a Foundry client
5856
Args:
59-
name (str): Name of the foundry dataset. If not supplied, metadata will not be loaded into
60-
the Foundry object
6157
no_browser (bool): Whether to open the browser for the Globus Auth URL.
6258
no_local_server (bool): Whether a local server is available.
6359
This should be `False` when on remote server (e.g., Google Colab ).
6460
index (str): Index to use for search and data publication. Choices `mdf` or `mdf-test`
6561
authorizers (dict): A dictionary of authorizers to use, following the `mdf_toolbox` format
66-
download (bool): If True, download the data associated with the package (default is True)
67-
globus (bool): If True, download using Globus, otherwise https
68-
verbose (bool): If True print additional debug information
69-
metadata (dict): **For debug purposes.** A search result analog to prepopulate metadata.
70-
interval (int): How often to poll Globus to check if transfers are complete
7162
data (dict): Other arguments, e.g., results from an MDF search result that are used
7263
to populate Foundry metadata fields
73-
7464
Returns:
7565
an initialized and authenticated Foundry client
7666
"""
@@ -155,29 +145,20 @@ def __init__(
155145
force_login=False,
156146
)
157147

158-
if name is not None:
159-
self._load(name=name,
160-
download=download,
161-
globus=globus,
162-
verbose=verbose,
163-
metadata=metadata,
164-
authorizers=authorizers,
165-
interval=interval)
166-
167-
def _load(self, name, download=True, globus=True, verbose=False, metadata=None, authorizers=None, interval=None):
148+
def load(self, name, download=True, globus=False, verbose=False, metadata=None, authorizers=None, **kwargs):
168149
"""Load the metadata for a Foundry dataset into the client
169150
Args:
170151
name (str): Name of the foundry dataset
171152
download (bool): If True, download the data associated with the package (default is True)
172153
globus (bool): If True, download using Globus, otherwise https
173154
verbose (bool): If True print additional debug information
174155
metadata (dict): **For debug purposes.** A search result analog to prepopulate metadata.
156+
Keyword Args: (TODO: make this a regular arg instead?)
175157
interval (int): How often to poll Globus to check if transfers are complete
176158
177159
Returns:
178160
self
179161
"""
180-
181162
# handle empty dataset name (was returning all the datasets)
182163
if not name:
183164
raise ValueError("load: No dataset name is given")
@@ -220,8 +201,9 @@ def _load(self, name, download=True, globus=True, verbose=False, metadata=None,
220201
self.dataset = FoundryDataset(**res['dataset'])
221202

222203
if download: # Add check for package existence
204+
# TODO: perhaps change interval here
223205
self.download(
224-
interval=interval, globus=globus, verbose=verbose
206+
interval=kwargs.get("interval", 10), globus=globus, verbose=verbose
225207
)
226208

227209
return self
@@ -425,6 +407,8 @@ def publish_dataset(
425407
self.connect_client.set_project_block(
426408
self.config.metadata_key, foundry_metadata)
427409

410+
# NOTE: need to add data_source and set_block before submit_dataset() is called -- so might want to upload
411+
# from here and not from within submit_dataset()?
428412
# upload via HTTPS if specified
429413
if https_data_path:
430414
# gather auth'd clients necessary for publication to endpoint
@@ -437,6 +421,20 @@ def publish_dataset(
437421
)
438422
# upload (ie publish) data to endpoint
439423
globus_data_source = upload_to_endpoint(pub_auths, https_data_path, endpoint_id)
424+
425+
"""
426+
# proposed solution
427+
.
428+
.
429+
self.connect_client.set_project_block()
430+
# auths handled within MDF? attempt this
431+
# consider renaming to "https_upload" or similar
432+
if https_data_path:
433+
globus_data_source = self.connect_client.upload_to_endpoint(https_data_path) #leave endpoint_id as an option
434+
self.connect_client.add_data_source(globus_data_source)
435+
.
436+
.
437+
"""
440438
# set Globus data source URL with MDF
441439
self.connect_client.add_data_source(globus_data_source)
442440
# set dataset name using the title if an abbreviated short_name isn't specified

tests/test_foundry.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -207,33 +207,33 @@ def _delete_test_data(foundry_obj):
207207

208208

209209
def test_foundry_init():
210-
f = Foundry(test_dataset, download=False, authorizers=auths)
210+
f = Foundry(authorizers=auths)
211211
assert isinstance(f.forge_client, Forge)
212212
assert isinstance(f.connect_client, MDFConnectClient)
213213

214214
if not is_gha:
215215
assert isinstance(f.dlhub_client, DLHubClient)
216216

217-
f2 = Foundry(test_dataset, download=False, authorizers=auths, no_browser=False, no_local_server=True)
217+
f2 = Foundry(authorizers=auths, no_browser=False, no_local_server=True)
218218
assert isinstance(f2.dlhub_client, DLHubClient)
219219
assert isinstance(f2.forge_client, Forge)
220220
assert isinstance(f2.connect_client, MDFConnectClient)
221221

222-
f3 = Foundry(test_dataset, download=False, authorizers=auths, no_browser=True, no_local_server=False)
222+
f3 = Foundry(authorizers=auths, no_browser=True, no_local_server=False)
223223
assert isinstance(f3.dlhub_client, DLHubClient)
224224
assert isinstance(f3.forge_client, Forge)
225225
assert isinstance(f3.connect_client, MDFConnectClient)
226226

227227

228228
def test_list():
229-
f = Foundry(test_dataset, download=False, authorizers=auths)
229+
f = Foundry(authorizers=auths)
230230
ds = f.list()
231231
assert isinstance(ds, pd.DataFrame)
232232
assert len(ds) > 0
233233

234234

235235
def test_search():
236-
f = Foundry(test_dataset, download=False, authorizers=auths)
236+
f = Foundry(authorizers=auths)
237237
q = "Elwood"
238238
ds = f.search(q)
239239

@@ -245,20 +245,25 @@ def test_search():
245245

246246

247247
def test_metadata_pull():
248-
f = Foundry(test_dataset, download=False, authorizers=auths)
248+
f = Foundry(authorizers=auths)
249+
assert f.dc == {}
250+
f = f.load(test_dataset, download=False, authorizers=auths)
249251
assert f.dc["titles"][0]["title"] == expected_title
250252

251253

252254
def test_download_https():
253-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths)
255+
f = Foundry(authorizers=auths)
254256
_delete_test_data(f)
257+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
255258

256259
assert f.dc["titles"][0]["title"] == expected_title
257260
_delete_test_data(f)
258261

259262

260263
def test_dataframe_load():
261-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths)
264+
f = Foundry(authorizers=auths)
265+
_delete_test_data(f)
266+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
262267

263268
res = f.load_data()
264269
X, y = res['train']
@@ -271,7 +276,9 @@ def test_dataframe_load():
271276

272277

273278
def test_dataframe_load_split():
274-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths)
279+
f = Foundry(authorizers=auths)
280+
_delete_test_data(f)
281+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
275282

276283
res = f.load_data(splits=['train'])
277284
X, y = res['train']
@@ -284,7 +291,9 @@ def test_dataframe_load_split():
284291

285292

286293
def test_dataframe_load_split_wrong_split_name():
287-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths)
294+
f = Foundry(authorizers=auths)
295+
_delete_test_data(f)
296+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
288297

289298
with pytest.raises(Exception) as exc_info:
290299
f.load_data(splits=['chewbacca'])
@@ -297,15 +306,19 @@ def test_dataframe_load_split_wrong_split_name():
297306

298307
@pytest.mark.skip(reason='No clear examples of datasets without splits - likely to be protected against soon.')
299308
def test_dataframe_load_split_but_no_splits():
300-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths)
309+
f = Foundry(authorizers=auths)
310+
_delete_test_data(f)
311+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
301312

302313
with pytest.raises(ValueError):
303314
f.load_data(splits=['train'])
304315
_delete_test_data(f)
305316

306317

307318
def test_dataframe_load_doi():
308-
f = Foundry(test_doi, download=True, globus=False, authorizers=auths)
319+
f = Foundry(authorizers=auths)
320+
_delete_test_data(f)
321+
f = f.load(test_doi, download=True, globus=False, authorizers=auths)
309322

310323
res = f.load_data()
311324
X, y = res['train']
@@ -319,16 +332,21 @@ def test_dataframe_load_doi():
319332

320333
@pytest.mark.skipif(bool(is_gha), reason="Test does not succeed on GHA - no Globus endpoint")
321334
def test_download_globus():
322-
f = Foundry(test_dataset, download=True, authorizers=auths, no_browser=True, no_local_server=True)
335+
f = Foundry(authorizers=auths, no_browser=True, no_local_server=True)
323336
_delete_test_data(f)
337+
f = f.load(test_dataset, download=True)
324338

325339
assert f.dc["titles"][0]["title"] == expected_title
326340
_delete_test_data(f)
327341

328342

329343
@pytest.mark.skipif(bool(is_gha), reason="Test does not succeed on GHA - no Globus endpoint")
330344
def test_globus_dataframe_load():
331-
f = Foundry(test_dataset, download=True, authorizers=auths, no_browser=True, no_local_server=True)
345+
f = Foundry(authorizers=auths, no_browser=True, no_local_server=True)
346+
347+
_delete_test_data(f)
348+
349+
f = f.load(test_dataset, download=True)
332350

333351
res = f.load_data()
334352
X, y = res['train']
@@ -482,7 +500,9 @@ def test_check_status():
482500

483501

484502
def test_to_pytorch():
485-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths, no_browser=True, no_local_server=True)
503+
f = Foundry(authorizers=auths, no_browser=True, no_local_server=True)
504+
_delete_test_data(f)
505+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
486506

487507
raw = f.load_data()
488508

@@ -495,7 +515,9 @@ def test_to_pytorch():
495515

496516

497517
def test_to_tensorflow():
498-
f = Foundry(test_dataset, download=True, globus=False, authorizers=auths, no_browser=True, no_local_server=True)
518+
f = Foundry(authorizers=auths, no_browser=True, no_local_server=True)
519+
_delete_test_data(f)
520+
f = f.load(test_dataset, download=True, globus=False, authorizers=auths)
499521

500522
raw = f.load_data()
501523

0 commit comments

Comments
 (0)