Skip to content

Commit e4b652c

Browse files
committed
API: Split list_entity_type_eids in two.
- Added endpoint to get only documents. - Added endpoint to get only count. - Deprecated original endpoint.
1 parent 2722ff6 commit e4b652c

3 files changed

Lines changed: 184 additions & 29 deletions

File tree

dp3/api/internal/entity_response_models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ class EntityEidList(BaseModel):
5252
data: EntityEidSnapshots
5353

5454

55+
class EntityEidCount(BaseModel):
56+
"""Total count of documents available under specified filter."""
57+
58+
total_count: int
59+
60+
5561
class EntityEidData(BaseModel):
5662
"""Data of entity eid
5763

dp3/api/routers/entity.py

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from dp3.api.internal.entity_response_models import (
99
EntityEidAttrValue,
1010
EntityEidAttrValueOrHistory,
11+
EntityEidCount,
1112
EntityEidData,
1213
EntityEidList,
1314
EntityEidMasterRecord,
@@ -76,8 +77,29 @@ def get_eid_snapshots_handler(
7677
router = APIRouter(dependencies=[Depends(check_etype)])
7778

7879

80+
def _validate_snapshot_filters(fulltext_filters, generic_filter):
81+
if not fulltext_filters:
82+
fulltext_filters = {}
83+
if not isinstance(fulltext_filters, dict):
84+
raise HTTPException(status_code=400, detail="Fulltext filter is invalid")
85+
86+
if not generic_filter:
87+
generic_filter = {}
88+
if not isinstance(generic_filter, dict):
89+
raise HTTPException(status_code=400, detail="Generic filter is invalid")
90+
91+
for attr in fulltext_filters:
92+
ftr = fulltext_filters[attr]
93+
if not isinstance(ftr, str):
94+
raise HTTPException(status_code=400, detail=f"Filter '{ftr}' is not string")
95+
96+
return fulltext_filters, generic_filter
97+
98+
7999
@router.get(
80-
"/{etype}", responses={400: {"description": "Query can't be processed", "model": ErrorResponse}}
100+
"/{etype}",
101+
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
102+
deprecated=True,
81103
)
82104
async def list_entity_type_eids(
83105
etype: str,
@@ -88,7 +110,47 @@ async def list_entity_type_eids(
88110
) -> EntityEidList:
89111
"""List latest snapshots of all `id`s present in database under `etype`.
90112
113+
Deprecated in favor of `/entity/{etype}/get` and `/entity/{etype}/count` endpoints,
114+
which provide more flexibility and better performance.
115+
116+
See `/entity/{etype}/get` for more information.
117+
"""
118+
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)
119+
120+
try:
121+
cursor, total_count = DB.snapshots.get_latest(etype, fulltext_filters, generic_filter)
122+
cursor_page = cursor.skip(skip).limit(limit)
123+
except DatabaseError as e:
124+
raise HTTPException(status_code=400, detail=str(e)) from e
125+
126+
time_created = None
127+
128+
# Remove _id field
129+
result = [r["last"] for r in cursor_page]
130+
for r in result:
131+
time_created = r["_time_created"]
132+
del r["_time_created"]
133+
134+
return EntityEidList(
135+
time_created=time_created, count=len(result), total_count=total_count, data=result
136+
)
137+
138+
139+
@router.get(
140+
"/{etype}/get",
141+
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
142+
)
143+
async def get_entity_type_eids(
144+
etype: str,
145+
fulltext_filters: Json = None,
146+
generic_filter: Json = None,
147+
skip: NonNegativeInt = 0,
148+
limit: NonNegativeInt = 20,
149+
) -> EntityEidList:
150+
"""List latest snapshots of all `id`s present in database under `etype`.
151+
91152
Contains only latest snapshot.
153+
The `total_count` returned is always 0, use `/entity/{etype}/count` to get total count.
92154
93155
Uses pagination.
94156
Setting `limit` to 0 is interpreted as no limit (return all results).
@@ -164,23 +226,10 @@ async def list_entity_type_eids(
164226
165227
Generic and fulltext filters are merged - fulltext overrides conflicting keys.
166228
"""
167-
if not fulltext_filters:
168-
fulltext_filters = {}
169-
if not isinstance(fulltext_filters, dict):
170-
raise HTTPException(status_code=400, detail="Fulltext filter is invalid")
171-
172-
if not generic_filter:
173-
generic_filter = {}
174-
if not isinstance(generic_filter, dict):
175-
raise HTTPException(status_code=400, detail="Generic filter is invalid")
176-
177-
for attr in fulltext_filters:
178-
ftr = fulltext_filters[attr]
179-
if not isinstance(ftr, str):
180-
raise HTTPException(status_code=400, detail=f"Filter '{ftr}' is not string")
229+
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)
181230

182231
try:
183-
cursor, total_count = DB.snapshots.get_latest(etype, fulltext_filters, generic_filter)
232+
cursor = DB.snapshots.find_latest(etype, fulltext_filters, generic_filter)
184233
cursor_page = cursor.skip(skip).limit(limit)
185234
except DatabaseError as e:
186235
raise HTTPException(status_code=400, detail=str(e)) from e
@@ -193,9 +242,34 @@ async def list_entity_type_eids(
193242
time_created = r["_time_created"]
194243
del r["_time_created"]
195244

196-
return EntityEidList(
197-
time_created=time_created, count=len(result), total_count=total_count, data=result
198-
)
245+
return EntityEidList(time_created=time_created, count=len(result), total_count=0, data=result)
246+
247+
248+
@router.get(
249+
"/{etype}/count",
250+
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
251+
)
252+
async def count_entity_type_eids(
253+
etype: str,
254+
fulltext_filters: Json = None,
255+
generic_filter: Json = None,
256+
) -> EntityEidCount:
257+
"""Count latest snapshots of all `id`s present in database under `etype`.
258+
259+
Returns only count of documents matching `generic_filter` and `fulltext_filters`,
260+
see `/entity/{etype}/get` documentation for details.
261+
262+
Note that responses from this endpoint may take much longer than `/entity/{etype}/get`
263+
for large datasets.
264+
"""
265+
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)
266+
267+
try:
268+
count = DB.snapshots.count_latest(etype, fulltext_filters, generic_filter)
269+
except DatabaseError as e:
270+
raise HTTPException(status_code=400, detail=str(e)) from e
271+
272+
return EntityEidCount(total_count=count)
199273

200274

201275
@router.get("/{etype}/{eid}")

dp3/database/snapshots.py

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,59 @@ def get_latest(
185185
May raise `SnapshotCollectionError` if query is invalid.
186186
"""
187187
snapshot_col = self._col()
188+
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})
188189

189-
if not fulltext_filters:
190-
fulltext_filters = {}
190+
try:
191+
return snapshot_col.find(query, {"last": 1}).sort(
192+
[("_id", pymongo.ASCENDING)]
193+
), snapshot_col.count_documents(query)
194+
except OperationFailure as e:
195+
raise SnapshotCollectionError(f"Query is invalid: {e}") from e
196+
197+
def find_latest(
198+
self,
199+
fulltext_filters: Optional[dict[str, str]] = None,
200+
generic_filter: Optional[dict[str, Any]] = None,
201+
) -> Cursor:
202+
"""Find latest snapshots of given `etype`.
203+
204+
See [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
205+
for more information.
206+
207+
Returns only documents matching `generic_filter` and `fulltext_filters`,
208+
does not count them.
209+
"""
210+
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})
211+
try:
212+
return self._col().find(query, {"last": 1}).sort([("_id", pymongo.ASCENDING)])
213+
except OperationFailure as e:
214+
raise SnapshotCollectionError(f"Query is invalid: {e}") from e
191215

192-
if not generic_filter:
193-
generic_filter = {}
216+
def count_latest(
217+
self,
218+
fulltext_filters: Optional[dict[str, str]] = None,
219+
generic_filter: Optional[dict[str, Any]] = None,
220+
) -> int:
221+
"""Count latest snapshots of given `etype`.
222+
223+
See [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
224+
for more information.
225+
226+
Returns only count of documents matching `generic_filter` and `fulltext_filters`.
227+
228+
Note that this method may take much longer than `get_latest` on larger databases,
229+
as it does count all documents, not just return the first few.
230+
"""
231+
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})
232+
try:
233+
return self._col().count_documents(query)
234+
except OperationFailure as e:
235+
raise SnapshotCollectionError(f"Query is invalid: {e}") from e
194236

237+
def _prepare_latest_query(
238+
self, fulltext_filters: dict[str, str], generic_filter: dict[str, Any]
239+
):
240+
"""Prepare query for get_latest method."""
195241
# Create base of query
196242
try:
197243
query = search_and_replace(generic_filter)
@@ -222,12 +268,7 @@ def get_latest(
222268
else:
223269
query["last." + attr] = fulltext_filter
224270

225-
try:
226-
return snapshot_col.find(query, {"last": 1}).sort(
227-
[("_id", pymongo.ASCENDING)]
228-
), snapshot_col.count_documents(query)
229-
except OperationFailure as e:
230-
raise SnapshotCollectionError(f"Query is invalid: {e}") from e
271+
return query
231272

232273
def get_by_eid(
233274
self, eid: AnyEidT, t1: Optional[datetime] = None, t2: Optional[datetime] = None
@@ -778,6 +819,40 @@ def get_latest(
778819
"""
779820
return self[entity_type].get_latest(fulltext_filters, generic_filter)
780821

822+
def find_latest(
823+
self,
824+
entity_type: str,
825+
fulltext_filters: Optional[dict[str, str]] = None,
826+
generic_filter: Optional[dict[str, Any]] = None,
827+
) -> Cursor:
828+
"""Find latest snapshots of given `etype`.
829+
830+
see [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
831+
for more information.
832+
833+
Returns only documents matching `generic_filter` and `fulltext_filters`,
834+
does not count them.
835+
"""
836+
return self[entity_type].find_latest(fulltext_filters, generic_filter)
837+
838+
def count_latest(
839+
self,
840+
entity_type: str,
841+
fulltext_filters: Optional[dict[str, str]] = None,
842+
generic_filter: Optional[dict[str, Any]] = None,
843+
) -> int:
844+
"""Count latest snapshots of given `etype`.
845+
846+
see [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
847+
for more information.
848+
849+
Returns only count of documents matching `generic_filter` and `fulltext_filters`.
850+
851+
Note that this method may take much longer than `get_latest` on larger databases,
852+
as it does count all documents, not just return the first few.
853+
"""
854+
return self[entity_type].count_latest(fulltext_filters, generic_filter)
855+
781856
def get_by_eid(
782857
self,
783858
entity_type: str,

0 commit comments

Comments
 (0)