Skip to content

Commit 07765ca

Browse files
dbernsteinclaude
andcommitted
Remove _all_books_out_of_scope from Overdrive importer
The method and its call site were made obsolete by the lastUpdateTime parameter already being passed to Overdrive's API, which performs server-side filtering so only relevant books are returned in the first place. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent b6c211c commit 07765ca

2 files changed

Lines changed: 3 additions & 377 deletions

File tree

src/palace/manager/integration/license/overdrive/importer.py

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from dataclasses import dataclass
55
from typing import Any
66

7-
import dateutil
87
from sqlalchemy.orm import Session
98

109
from palace.util.exceptions import PalaceValueError
@@ -185,36 +184,6 @@ def _process_book(
185184

186185
return identifier, changed
187186

188-
def _all_books_out_of_scope(
189-
self,
190-
modified_since: datetime.datetime,
191-
book_data: list[dict[str, Any]],
192-
) -> bool:
193-
"""Check if all books in the book_data are out of scope in terms of the date they were added.
194-
195-
This method is used to determine if we should continue to fetch the next page of books.
196-
Overdrive does not provide a way to retrieve books that were added or modified since a given date.
197-
They do however give us the "date_added" value for each book which we can use to determine
198-
if the book was added before the modified_since date.
199-
200-
:param modified_since: The datetime to check if the books are out of scope.
201-
:param book_data: The book data to check if the books are out of scope.
202-
:return: True if all books are out of scope, False otherwise.
203-
"""
204-
out_of_scope_count = 0
205-
206-
for book in book_data:
207-
date_added = book.get("date_added", None)
208-
if not date_added:
209-
# this should not happen, but if it does, we'll assume the book is not out of scope.
210-
continue
211-
212-
date_added = dateutil.parser.parse(date_added)
213-
if date_added < modified_since:
214-
out_of_scope_count += 1
215-
216-
return out_of_scope_count == len(book_data)
217-
218187
def import_collection(
219188
self,
220189
*,
@@ -234,12 +203,7 @@ def import_collection(
234203
- For advantage collections (with parent_identifier_set): Fetches metadata lazily,
235204
skipping books that are already in the parent collection
236205
237-
2. **Out-of-Scope Optimization**:
238-
- If all books in the current page were added before modified_since and there were no changes detected,
239-
stops pagination early to avoid processing old data
240-
- Can be disabled when modified_since is None.
241-
242-
3. **Change Detection**:
206+
2. **Change Detection**:
243207
- Only applies bibliographic updates if metadata has changed
244208
- Always checks circulation data as availability changes frequently and applies changes only if changed.
245209
@@ -250,7 +214,7 @@ def import_collection(
250214
starting from modified_since
251215
:param page_size: Number of items to fetch per page
252216
:return: FeedImportResult containing current_page (the endpoint that was processed),
253-
next_page (the next endpoint to process, None if done or all books out of scope),
217+
next_page (the next endpoint to process, None if done),
254218
and processed_count (number of books processed in this call)
255219
256220
.. note::
@@ -288,7 +252,6 @@ def import_collection(
288252
)
289253

290254
timestamp = self.get_timestamp()
291-
changed_books_count = 0
292255
# Fetch metadata upfront if no parent identifier set is provided. Practically speaking,
293256
# if there is no parent identifier set, then the collection being imported is a
294257
# main rather than an advantage collection. We always fetch availability because we do not gain
@@ -302,11 +265,9 @@ def import_collection(
302265
)
303266
)
304267
for book in book_data:
305-
identifier, changed = self._process_book(
268+
identifier, _ = self._process_book(
306269
book, fetch_metadata, policy, apply_bibliographic, apply_circulation
307270
)
308-
if changed:
309-
changed_books_count += 1
310271
identifiers.append(identifier)
311272

312273
achievements = [f"Total items queued for import: {len(identifiers)}."]
@@ -322,15 +283,6 @@ def import_collection(
322283
f"Finished import of {len(identifiers)} for collection {self._collection.name} (id={self._collection.id}). "
323284
f"{' '.join(achievements)}"
324285
)
325-
# if we are not in import all mode and all books are both out of scope and no books were changed, we can assume that
326-
# were are done importing and therefore we don't need to fetch the next page.
327-
if (
328-
modified_since is not None
329-
and changed_books_count == 0
330-
and self._all_books_out_of_scope(modified_since, book_data)
331-
):
332-
next_endpoint = None
333-
334286
return FeedImportResult(
335287
next_page=next_endpoint,
336288
current_page=endpoint,

0 commit comments

Comments
 (0)