@@ -1314,6 +1314,24 @@ def _paginate_hits(self,
13141314 else :
13151315 break
13161316
1317+ def _paginate_hits_sorted (self ,
1318+ request : Search ,
1319+ sort : SortKey
1320+ ) -> Iterable [Hit ]:
1321+ """
1322+ Wrapper around :meth:`_paginate_hits` for simple cases where
1323+ """
1324+ request = request .extra (size = self .page_size )
1325+ request = request .sort (* sort )
1326+
1327+ def request_factory (search_after : SortKey | None ) -> Search :
1328+ if search_after is None :
1329+ return request
1330+ else :
1331+ return request .extra (search_after = search_after )
1332+
1333+ return self ._paginate_hits (request_factory )
1334+
13171335 def _create_paged_request (self , search_after : SortKey | None ) -> Search :
13181336 pagination = Pagination (sort = 'entryId' ,
13191337 order = 'asc' ,
@@ -1986,7 +2004,6 @@ def _join_replicas(self, keys: Iterable[ReplicaKeys]) -> Iterable[Hit]:
19862004 {'terms' : {'hub_ids.keyword' : list (hub_ids )}},
19872005 {'terms' : {'entity_id.keyword' : list (replica_ids )}}
19882006 ]))
1989- request = request .extra (size = self .page_size )
19902007
19912008 # `_id` is currently the only index field that is unique to each replica
19922009 # document (and thus results in an unambiguous total ordering). However,
@@ -1998,15 +2015,8 @@ def _join_replicas(self, keys: Iterable[ReplicaKeys]) -> Iterable[Hit]:
19982015 # FIXME: ES DeprecationWarning for using _id as sort key
19992016 # https://github.com/DataBiosphere/azul/issues/7290
20002017 #
2001- request = request .sort ('entity_id.keyword' , '_id' )
2002-
2003- def request_factory (search_after : SortKey | None ) -> Search :
2004- if search_after is None :
2005- return request
2006- else :
2007- return request .extra (search_after = search_after )
2008-
2009- return self ._paginate_hits (request_factory )
2018+ sort = ('entity_id.keyword' , '_id' )
2019+ return self ._paginate_hits_sorted (request , sort )
20102020
20112021
20122022class JSONLVerbatimManifestGenerator (PagedManifestGenerator ,
0 commit comments