44from dataclasses import dataclass
55from typing import Any
66
7- import dateutil
87from sqlalchemy .orm import Session
98
109from palace .util .exceptions import PalaceValueError
@@ -185,36 +184,6 @@ def _process_book(
185184
186185 return identifier , changed
187186
188- def _all_books_out_of_scope (
189- self ,
190- modified_since : datetime .datetime ,
191- book_data : list [dict [str , Any ]],
192- ) -> bool :
193- """Check if all books in the book_data are out of scope in terms of the date they were added.
194-
195- This method is used to determine if we should continue to fetch the next page of books.
196- Overdrive does not provide a way to retrieve books that were added or modified since a given date.
197- They do however give us the "date_added" value for each book which we can use to determine
198- if the book was added before the modified_since date.
199-
200- :param modified_since: The datetime to check if the books are out of scope.
201- :param book_data: The book data to check if the books are out of scope.
202- :return: True if all books are out of scope, False otherwise.
203- """
204- out_of_scope_count = 0
205-
206- for book in book_data :
207- date_added = book .get ("date_added" , None )
208- if not date_added :
209- # this should not happen, but if it does, we'll assume the book is not out of scope.
210- continue
211-
212- date_added = dateutil .parser .parse (date_added )
213- if date_added < modified_since :
214- out_of_scope_count += 1
215-
216- return out_of_scope_count == len (book_data )
217-
218187 def import_collection (
219188 self ,
220189 * ,
@@ -234,12 +203,7 @@ def import_collection(
234203 - For advantage collections (with parent_identifier_set): Fetches metadata lazily,
235204 skipping books that are already in the parent collection
236205
237- 2. **Out-of-Scope Optimization**:
238- - If all books in the current page were added before modified_since and there were no changes detected,
239- stops pagination early to avoid processing old data
240- - Can be disabled when modified_since is None.
241-
242- 3. **Change Detection**:
206+ 2. **Change Detection**:
243207 - Only applies bibliographic updates if metadata has changed
244208 - Always checks circulation data as availability changes frequently and applies changes only if changed.
245209
@@ -250,7 +214,7 @@ def import_collection(
250214 starting from modified_since
251215 :param page_size: Number of items to fetch per page
252216 :return: FeedImportResult containing current_page (the endpoint that was processed),
253- next_page (the next endpoint to process, None if done or all books out of scope ),
217+ next_page (the next endpoint to process, None if done),
254218 and processed_count (number of books processed in this call)
255219
256220 .. note::
@@ -288,7 +252,6 @@ def import_collection(
288252 )
289253
290254 timestamp = self .get_timestamp ()
291- changed_books_count = 0
292255 # Fetch metadata upfront if no parent identifier set is provided. Practically speaking,
293256 # if there is no parent identifier set, then the collection being imported is a
294257 # main rather than an advantage collection. We always fetch availability because we do not gain
@@ -302,11 +265,9 @@ def import_collection(
302265 )
303266 )
304267 for book in book_data :
305- identifier , changed = self ._process_book (
268+ identifier , _ = self ._process_book (
306269 book , fetch_metadata , policy , apply_bibliographic , apply_circulation
307270 )
308- if changed :
309- changed_books_count += 1
310271 identifiers .append (identifier )
311272
312273 achievements = [f"Total items queued for import: { len (identifiers )} ." ]
@@ -322,15 +283,6 @@ def import_collection(
322283 f"Finished import of { len (identifiers )} for collection { self ._collection .name } (id={ self ._collection .id } ). "
323284 f"{ ' ' .join (achievements )} "
324285 )
325- # if we are not in import all mode and all books are both out of scope and no books were changed, we can assume that
326- # were are done importing and therefore we don't need to fetch the next page.
327- if (
328- modified_since is not None
329- and changed_books_count == 0
330- and self ._all_books_out_of_scope (modified_since , book_data )
331- ):
332- next_endpoint = None
333-
334286 return FeedImportResult (
335287 next_page = next_endpoint ,
336288 current_page = endpoint ,
0 commit comments