Skip to content

Commit 5994c58

Browse files
Housekeeping: Remove unused molecules endpoints + others (#1068)
2 parents 2d0ffff + e45342f commit 5994c58

18 files changed

Lines changed: 255 additions & 1472 deletions

mp_api/client/core/client.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def __init__(
198198
warnings.warn(
199199
"Ignoring `monty_decode`, as it is no longer a supported option in `mp_api`."
200200
"The client by default returns results consistent with `monty_decode=True`.",
201-
category=DeprecationWarning,
201+
category=MPRestWarning,
202202
stacklevel=2,
203203
)
204204

@@ -1360,7 +1360,7 @@ def new_str(self) -> str:
13601360

13611361
return (
13621362
f"\033[4m\033[1m{self.__class__.__name__}"
1363-
f"<{self.__class__.__base__.__name__}>\033[0;0m\033[0;0m"
1363+
f"<{orig_rester_name}>\033[0;0m\033[0;0m"
13641364
f"\n{extra}\n\n"
13651365
f"\033[1mFields not requested:\033[0;0m\n{fields_not_requested}"
13661366
)
@@ -1608,7 +1608,7 @@ def __getattr__(self, v: str):
16081608
self.sub_resters[v](
16091609
api_key=self.api_key,
16101610
endpoint=self.base_endpoint,
1611-
include_user_agent=self._include_user_agent,
1611+
include_user_agent=self.include_user_agent,
16121612
session=self.session,
16131613
use_document_model=self.use_document_model,
16141614
headers=self.headers,
@@ -1617,6 +1617,7 @@ def __getattr__(self, v: str):
16171617
force_renew=self.force_renew,
16181618
)
16191619
return self.sub_resters[v]
1620+
raise AttributeError(f"{self.__class__} has no attribute {v}")
16201621

16211622
def __dir__(self):
16221623
return dir(self.__class__) + list(self._sub_resters)

mp_api/client/mprester.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,25 +1070,28 @@ def get_entries_in_chemsys(
10701070
if isinstance(elements, str):
10711071
elements = elements.split("-")
10721072

1073-
elements_set = set(elements) # remove duplicate elements
1073+
# 9 elements would be sum_{i=1}^{9} (9 choose i) = 511
1074+
# From testing, this is the highest number of chemsys
1075+
# we can query before URI lengths are exceeded
1076+
if len(elements_set := set(elements)) > 9: # remove duplicate elements
1077+
raise MPRestError(
1078+
"Please specify fewer elements to query by, "
1079+
"or identify a subset of relevant chemical systems to query first."
1080+
)
10741081

10751082
all_chemsyses = [
10761083
"-".join(sorted(els))
10771084
for i in range(len(elements_set))
10781085
for els in itertools.combinations(elements_set, i + 1)
10791086
]
10801087

1081-
entries = []
1082-
1083-
entries.extend(
1084-
self.get_entries(
1085-
all_chemsyses,
1086-
compatible_only=compatible_only,
1087-
property_data=property_data,
1088-
conventional_unit_cell=conventional_unit_cell,
1089-
additional_criteria=additional_criteria or DEFAULT_THERMOTYPE_CRITERIA,
1090-
**kwargs,
1091-
)
1088+
entries = self.get_entries(
1089+
all_chemsyses,
1090+
compatible_only=compatible_only,
1091+
property_data=property_data,
1092+
conventional_unit_cell=conventional_unit_cell,
1093+
additional_criteria=additional_criteria or DEFAULT_THERMOTYPE_CRITERIA,
1094+
**kwargs,
10921095
)
10931096

10941097
if use_gibbs:
@@ -1255,21 +1258,41 @@ def get_charge_density_from_material_id(
12551258
task_id = latest_doc["task_id"]
12561259
return self.get_charge_density_from_task_id(task_id, inc_task_doc)
12571260

1258-
def get_download_info(self, material_ids, calc_types=None, file_patterns=None):
1261+
def get_download_info(
1262+
self,
1263+
material_ids: str | MPID | list[str | MPID],
1264+
calc_types: list[str | CalcType] | None = None,
1265+
file_patterns: list[str] | None = None,
1266+
):
12591267
"""Get a list of URLs to retrieve raw VASP output files from the NoMaD repository
12601268
Args:
1261-
material_ids (list): list of material identifiers (mp-id's)
1262-
task_types (list): list of task types to include in download (see CalcType Enum class)
1269+
material_ids (str or MPID, or list thereof): list of material identifiers (mp-id's)
1270+
calc_types (list of str or CalcType): list of calc types to include in download (see CalcType Enum class)
12631271
file_patterns (list): list of wildcard file names to include for each task
12641272
Returns:
12651273
a tuple of 1) a dictionary mapping material_ids to task_ids and
12661274
calc_types, and 2) a list of URLs to download zip archives from
12671275
NoMaD repository. Each zip archive will contain a manifest.json with
12681276
metadata info, e.g. the task/external_ids that belong to a directory.
12691277
"""
1278+
warnings.warn(
1279+
"Full downloads of raw data are being transitioned to "
1280+
"Materials Project's AWS S3 OpenData buckets. "
1281+
"These features for accessing legacy raw data via NOMAD "
1282+
"are maintained but may not be supported in the future.",
1283+
category=MPRestWarning,
1284+
stacklevel=2,
1285+
)
1286+
12701287
# task_id's correspond to NoMaD external_id's
1288+
if isinstance(material_ids, str | MPID):
1289+
material_ids = [material_ids]
1290+
12711291
calc_types = (
1272-
[t.value for t in calc_types if isinstance(t, CalcType)]
1292+
[
1293+
t.value if isinstance(t, CalcType) else CalcType(t).value
1294+
for t in calc_types
1295+
]
12731296
if calc_types
12741297
else []
12751298
)

mp_api/client/routes/materials/materials.py

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from pathlib import Path
34
from typing import TYPE_CHECKING
45

56
from emmet.core.symmetry import CrystalSystem
@@ -172,11 +173,11 @@ def search(
172173

173174
def find_structure(
174175
self,
175-
filename_or_structure,
176+
filename_or_structure: str | Path | Structure,
176177
ltol=MAPI_CLIENT_SETTINGS.LTOL,
177178
stol=MAPI_CLIENT_SETTINGS.STOL,
178179
angle_tol=MAPI_CLIENT_SETTINGS.ANGLE_TOL,
179-
allow_multiple_results=False,
180+
allow_multiple_results: bool | int = False,
180181
) -> list[str] | str:
181182
"""Finds matching structures from the Materials Project database.
182183
@@ -186,48 +187,75 @@ def find_structure(
186187
default tolerances.
187188
188189
Args:
189-
filename_or_structure: filename or Structure object
190+
filename_or_structure: filename as a str or Path, or a Structure object
190191
ltol: fractional length tolerance
191192
stol: site tolerance
192193
angle_tol: angle tolerance in degrees
193-
allow_multiple_results: changes return type for either
194-
a single material_id or list of material_ids
194+
allow_multiple_results (bool or int): changes return type for either
195+
a single material_id or list of material_ids.
196+
If a bool, returns either all matches (True) or one match at most (False).
197+
If an int, returns that many matches at most.
198+
195199
Returns:
196200
A matching material_id if one is found or list of results if allow_multiple_results
197201
is True
198202
Raises:
199203
MPRestError
200204
"""
201-
params = {"ltol": ltol, "stol": stol, "angle_tol": angle_tol, "_limit": 1}
205+
from pymatgen.analysis.structure_matcher import (
206+
ElementComparator,
207+
StructureMatcher,
208+
)
202209

203-
if isinstance(filename_or_structure, str):
210+
if (
211+
isinstance(filename_or_structure, str | Path)
212+
and Path(filename_or_structure).exists()
213+
):
204214
s = Structure.from_file(filename_or_structure)
205215
elif isinstance(filename_or_structure, Structure):
206216
s = filename_or_structure
207217
else:
208218
raise MPRestError("Provide filename or Structure object.")
209219

210-
results = self._post_resource(
211-
body=s.as_dict(),
212-
params=params,
213-
suburl="find_structure",
214-
use_document_model=False,
215-
).get("data")
216-
217-
if not results:
220+
mat_docs = self.search(
221+
formula=s.reduced_formula, fields=["material_id", "structure"]
222+
)
223+
if not mat_docs:
218224
return []
219225

220-
material_ids = validate_ids([doc["material_id"] for doc in results])
226+
if isinstance(allow_multiple_results, bool):
227+
max_matches: int = len(mat_docs) if allow_multiple_results else 1
228+
elif isinstance(allow_multiple_results, int):
229+
max_matches = allow_multiple_results
230+
else:
231+
raise MPRestError(
232+
f"`allow_multiple_results` must be a bool or int, not {type(allow_multiple_results)}"
233+
)
221234

222-
if len(material_ids) > 1: # type: ignore
223-
if not allow_multiple_results:
224-
raise ValueError(
225-
"Multiple matches found for this combination of tolerances, but "
226-
"`allow_multiple_results` set to False."
227-
)
228-
return material_ids # type: ignore
235+
matcher = StructureMatcher(
236+
ltol=ltol,
237+
stol=stol,
238+
angle_tol=angle_tol,
239+
primitive_cell=True,
240+
scale=True,
241+
attempt_supercell=False,
242+
comparator=ElementComparator(),
243+
)
229244

230-
return material_ids[0]
245+
matches: list[str] = []
246+
for doc in mat_docs:
247+
if matcher.fit(
248+
s,
249+
doc.structure if self.use_document_model else Structure.from_dict(doc["structure"]), # type: ignore
250+
):
251+
matches.append(doc.material_id.string if self.use_document_model else doc["material_id"]) # type: ignore
252+
if len(matches) >= max_matches:
253+
break
254+
255+
if not matches:
256+
return []
257+
material_ids = validate_ids(matches)
258+
return material_ids if allow_multiple_results else material_ids[0]
231259

232260
def get_blessed_entries(
233261
self,

mp_api/client/routes/molecules/bonds.py

Lines changed: 0 additions & 134 deletions
This file was deleted.

0 commit comments

Comments
 (0)