Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8a8bc0f
first pass at adding cursor paging
Jan 9, 2026
de1e246
draft
Jan 13, 2026
34084ba
first draft at parallel partitioning
Jan 13, 2026
19929f1
code clean up
Jan 14, 2026
8c70514
move partitioning logic
Jan 14, 2026
411628f
change default
Jan 15, 2026
8f4b98e
add condition to skip first fetch when partitioning
Jan 20, 2026
a2dfb6c
flatten final return list
Jan 21, 2026
01d36fd
revert default paging method
Jan 22, 2026
c9f6397
add docs
Jan 22, 2026
ef7282d
add check for endpoint type since cursor paging does not support dele…
Jan 22, 2026
cf0b236
remove partitioning for now
Jan 26, 2026
a495ec0
decouple and refactor get_pages() into 2 smaller pagination generators
Jan 26, 2026
36157dc
default to cursor paging for `get_rows()`
Jan 26, 2026
2f0050d
fall back on reverse-offset if ods version not compatible with cursor…
Jan 27, 2026
68401f2
fall back on reverse-offset if incompatible with cursor paging
Jan 27, 2026
b3b708c
remove default step_change_version arg
Jan 27, 2026
d622074
undo get()
Jan 27, 2026
749cfc0
remove unneeded code
Jan 27, 2026
8672da0
move fallbacks to `get_rows()`
Jan 27, 2026
db06877
debug
Jan 28, 2026
493f998
reverse-offset as default
Jan 28, 2026
776c3ba
code clean up
Jan 29, 2026
b1a83ec
Minor cleanup of whitespace and logging.
jayckaiser Feb 3, 2026
aca665f
Implement get_pages instead of get_pages_cursor in composites.
jayckaiser Feb 3, 2026
1376c31
Label composite-offset pagination scheme with correct method.
jayckaiser Feb 3, 2026
f2efff8
Merge branch 'main' into feature/cursor_paging
jayckaiser Mar 24, 2026
75c42b9
Use defined logger in EdFiEndpoint instead of default.
jayckaiser Mar 24, 2026
76052ab
Minor cleanup.
jayckaiser Mar 24, 2026
8762b2c
Merge branch 'feature/cursor_paging' of https://github.com/edanalytic…
Apr 13, 2026
e6b684e
remove attribute
Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 87 additions & 15 deletions edfi_api_client/edfi_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
if TYPE_CHECKING:
from edfi_api_client.edfi_client import EdFiClient

from joblib import Parallel, delayed
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
from functools import partial


class EdFiEndpoint:
"""
Expand Down Expand Up @@ -123,30 +126,30 @@ def ping(self, *, params: Optional[dict] = None, **kwargs) -> requests.Response:
return res


def get(self, limit: Optional[int] = None, *, params: Optional[dict] = None, **kwargs) -> List[dict]:
def get(self, url: Optional[str] = None, limit: Optional[int] = None, *, params: Optional[dict] = None, **kwargs) -> List[dict]:
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
"""
This method returns the rows from a single GET request using the exact params passed by the user.

:return:
"""
logging.info(f"[Get {self.component}] Endpoint: {self.url}")
end_point = url or self.url
logging.info(f"[Get {self.component}] Endpoint: {end_point}")

# Override init params if passed
params = EdFiParams(params or self.params).copy()
if limit: # Override limit if passed
params['limit'] = limit

logging.info(f"[Get {self.component}] Parameters: {params}")
return self.client.session.get_response(self.url, params=params, **kwargs).json()

resp = self.client.session.get_response(end_point, params=params, **kwargs).json()
return resp


def get_rows(self,
*,
params: Optional[dict] = None, # Optional alternative params
page_size: int = 100,
reverse_paging: bool = True,
Comment thread
gnguyen87 marked this conversation as resolved.
step_change_version: bool = False,
change_version_step_size: int = 50000,
**kwargs
) -> Iterator[dict]:
"""
Expand All @@ -163,20 +166,22 @@ def get_rows(self,
:param max_wait:
:return:
"""
paged_result_iter = self.get_pages(

paged_result_iter = self.get_pages_cursor(
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
params=params,
page_size=page_size, reverse_paging=reverse_paging,
step_change_version=step_change_version, change_version_step_size=change_version_step_size,
page_size=page_size,
**kwargs
)

for paged_result in paged_result_iter:
yield from paged_result


def get_pages(self,
def get_pages_offset(self,
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
*,
url: Optional[str] = None,
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
params: Optional[dict] = None, # Optional alternative params
limit: Optional[int] = None,
page_size: int = 100,
reverse_paging: bool = True,
step_change_version: bool = False,
Expand All @@ -199,6 +204,11 @@ def get_pages(self,
"""
# Override init params if passed
paged_params = EdFiParams(params or self.params).copy()
end_point = url or self.url
logging.info(f"[Get {self.component}] Endpoint: {end_point}")

if limit: # Override limit if passed
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
paged_params['limit'] = limit

### Prepare pagination variables, depending on type of pagination being used
if step_change_version and reverse_paging:
Expand All @@ -210,19 +220,20 @@ def get_pages(self,
elif step_change_version:
logging.info(f"[Paged Get {self.component}] Pagination Method: Change Version Stepping")
paged_params.init_page_by_offset(page_size)
paged_params.init_page_by_change_version_step(change_version_step_size)

paged_params.init_page_by_change_version_step(change_version_step_size)
else:
logging.info(f"[Paged Get {self.component}] Pagination Method: Offset Pagination")
paged_params.init_page_by_offset(page_size)

total_count = 0
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
# Begin pagination-loop
while True:
logging.info(f"[Get {self.component}] Parameters: {paged_params}")

### GET from the API and yield the resulting JSON payload
paged_rows = self.get(params=paged_params, **kwargs)
logging.info(f"[Get {self.component}] Retrieved {len(paged_rows)} rows.")
paged_rows = self.client.session.get_response(end_point, params=paged_params, **kwargs).json()
yield paged_rows
logging.info(f"[Get {self.component}] Retrieved {len(paged_rows)} rows.")
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated

### Paginate, depending on the method specified in arguments
# Reverse offset pagination is only applicable during change-version stepping.
Expand All @@ -239,7 +250,7 @@ def get_pages(self,
except StopIteration:
logging.info(f"[Paged Get {self.component}] @ Change version exceeded max. Ending pagination.")
break

else:
# If no rows are returned, end pagination.
if len(paged_rows) == 0:
Expand All @@ -259,6 +270,67 @@ def get_pages(self,
else:
logging.info(f"@ Paginating offset...")
paged_params.page_by_offset()

def get_pages_cursor(self,
*,
url: Optional[str] = None,
params: Optional[dict] = None, # Optional alternative params
limit: Optional[int] = None,
page_size: int = 100,
**kwargs
) -> Iterator[List[dict]]:

# Override init params if passed
paged_params = EdFiParams(params or self.params).copy()
end_point = url or self.url
logging.info(f"[Get {self.component}] Endpoint: {end_point}")

if limit: # Override limit if passed
paged_params['limit'] = limit

# Fall back to reverse-offset paging if incompatible with cursor paging
def _fall_back_to_pages_by_offset():
Comment thread
jayckaiser marked this conversation as resolved.
Outdated
return self.get_pages_offset(
url = url,
params = params,
limit = limit,
page_size=page_size,
**kwargs
)

# Check ODS version compatibility for cursor paging
ods_version = tuple(map(int, self.client.get_ods_version().split(".")[:2]))
if ods_version < (7,3):
logging.warning(f"ODS {self.client.get_ods_version()} is incompatible. Cursor Paging requires v.7.3 or higher. Falling back to another paging method")
yield from _fall_back_to_pages_by_offset()
return
# deletes/key_changes cannot be retrieved with cursor paging
if self.get_deletes or self.get_key_changes:
logging.warning(f"Cursor Paging does not support deletes/key_changes. Falling back to another paging method")
yield from _fall_back_to_pages_by_offset()
return

logging.info(f"[Paged Get {self.component}] Pagination Method: Cursor Paging")

### Prepare pagination variables
### First request should not have any `page_token` and `page_size` defined
paged_params.init_page_by_token(page_token = None, page_size = None)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Page_size should be defined regardless, since we always set a default value.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's consolidate 317 and 332 and put it at the start of the while-loop. We can define page_token = None above the loop for the first pass.


# Begin pagination loop
while True:
logging.info(f"[Get {self.component}] Parameters: {paged_params}")

result = self.client.session.get_response(end_point, params = paged_params, **kwargs)
paged_rows = result.json()
logging.info(f"[Get {self.component}] Retrieved {len(paged_rows)} rows")
yield paged_rows

logging.info(f"[Paged Get {self.component}] @ Cursor paging ...")
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
if not result.headers.get("Next-Page-Token"):
logging.info(f"[Paged Get {self.component}] @ Retrieved zero rows. Ending pagination.")
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
break
paged_params.init_page_by_token(page_token = result.headers.get("Next-Page-Token"), page_size = page_size)
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated



def get_total_count(self, *, params: Optional[dict] = None, **kwargs) -> int:
Expand Down
27 changes: 27 additions & 0 deletions edfi_api_client/edfi_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def __init__(self,
# These parameters are only used during pagination. They must be explicitly initialized.
self.page_size = None
self.change_version_step_size = None
self.page_token = None
self.number = None
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this number attribute used?



def copy(self) -> 'EdFiParams':
Expand Down Expand Up @@ -190,3 +192,28 @@ def reverse_page_by_offset(self):

if self['offset'] < 0:
raise StopIteration

def init_page_by_token(self, page_token: str, page_size: int):
Comment thread
gnguyen87 marked this conversation as resolved.
Outdated
"""

:param page_size:
:param page_token:
:return:
"""

# Cursor paging behavior: page_token is required when page_size is specified.
# - If page_token is None: first request, do NOT include page_size
# - If page_token is present: include page_token and page_size
self.page_size = page_size
self.page_token = page_token

if page_token is None:
self.pop("pageToken", None)
self.pop("page_size", None)
else:
self["pageToken"] = self.page_token
self["page_size"] = self.page_size