Skip to content

Commit 7ddd85a

Browse files
k-ibarakiclaude
andcommitted
feat: add include_row_data parameter to Excel search
Add include_row_data parameter to search_cells method to retrieve entire row data for each match in a single call, avoiding N+1 reads. Changes: - Add include_row_data parameter to search_cells method - Update _scan_sheet to collect and attach row data when enabled - Add _get_row_data helper method to extract non-null cells from a row - Handle RuntimeError by collecting matches before accessing sheet rows - Support both fast path (_cells) and fallback path (iter_rows) Behavior: - Default: False (backward compatible) - Row data includes only non-null cells - Same-row multiple matches get independent row_data (duplicated) - Single-column sheets handled correctly Related: #55 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 1883b73 commit 7ddd85a

1 file changed

Lines changed: 65 additions & 12 deletions

File tree

src/sharepoint_excel.py

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def search_cells(
3434
file_path: str,
3535
query: str,
3636
sheet_name: str | None = None,
37+
include_row_data: bool = False,
3738
) -> str:
3839
"""
3940
セル内容を検索して該当位置を返す
@@ -67,25 +68,35 @@ def search_cells(
6768
# sheet_name 指定がある場合はそのシートを優先して検索
6869
if sheet_name:
6970
if sheet_name in workbook.sheetnames:
70-
self._scan_sheet(workbook[sheet_name], sheet_name, query, matches)
71+
self._scan_sheet(
72+
workbook[sheet_name],
73+
sheet_name,
74+
query,
75+
matches,
76+
include_row_data,
77+
)
7178

7279
# マッチが無ければ全シート走査にフォールバック
7380
if len(matches) == 0:
7481
for sn in workbook.sheetnames:
7582
if sn == sheet_name:
7683
continue
77-
self._scan_sheet(workbook[sn], sn, query, matches)
84+
self._scan_sheet(
85+
workbook[sn], sn, query, matches, include_row_data
86+
)
7887
else:
7988
# sheet_name が存在しない場合は「指定なし」と同じ扱いで全シート検索
8089
warnings.append(
8190
f"Sheet '{sheet_name}' not found. Searching all sheets instead."
8291
)
8392
for sn in workbook.sheetnames:
84-
self._scan_sheet(workbook[sn], sn, query, matches)
93+
self._scan_sheet(
94+
workbook[sn], sn, query, matches, include_row_data
95+
)
8596
else:
8697
# 全シート検索
8798
for sn in workbook.sheetnames:
88-
self._scan_sheet(workbook[sn], sn, query, matches)
99+
self._scan_sheet(workbook[sn], sn, query, matches, include_row_data)
89100

90101
logger.info(f"Found {len(matches)} matches for query '{query}'")
91102

@@ -270,6 +281,7 @@ def _scan_sheet(
270281
sheet_name_for_result: str,
271282
query: str,
272283
matches: list[dict[str, Any]],
284+
include_row_data: bool = False,
273285
) -> None:
274286
"""
275287
シート内のセルを走査してqueryに一致するセルをmatchesに追加する
@@ -281,31 +293,72 @@ def _scan_sheet(
281293
# その場合はiter_rows()を使用するフォールバックロジックが動作します。
282294
if hasattr(sheet, "_cells"):
283295
# 実在セルのみを走査(高速)
296+
# まずマッチを収集(_cellsのイテレーション中にsheetアクセスすると辞書が変わるため)
297+
new_matches: list[dict[str, Any]] = []
284298
for cell in sheet._cells.values():
285299
if cell.value is not None:
286300
cell_value_str = str(cell.value)
287301
if query in cell_value_str:
288-
matches.append(
302+
new_matches.append(
289303
{
290304
"sheet": sheet_name_for_result,
291305
"coordinate": cell.coordinate,
292306
"value": self._serialize_value(cell.value),
307+
"_row": cell.row,
293308
}
294309
)
310+
# イテレーション完了後に行データを取得
311+
for match in new_matches:
312+
row_num = match.pop("_row")
313+
if include_row_data:
314+
match["row_data"] = self._get_row_data(sheet, row_num)
315+
matches.append(match)
295316
else:
296317
# openpyxl公開APIを使用(互換性確保)
297318
for row in sheet.iter_rows(values_only=False):
298319
for cell in row:
299320
if cell.value is not None:
300321
cell_value_str = str(cell.value)
301322
if query in cell_value_str:
302-
matches.append(
303-
{
304-
"sheet": sheet_name_for_result,
305-
"coordinate": cell.coordinate,
306-
"value": self._serialize_value(cell.value),
307-
}
308-
)
323+
match = {
324+
"sheet": sheet_name_for_result,
325+
"coordinate": cell.coordinate,
326+
"value": self._serialize_value(cell.value),
327+
}
328+
if include_row_data:
329+
match["row_data"] = [
330+
{
331+
"coordinate": c.coordinate,
332+
"value": self._serialize_value(c.value),
333+
}
334+
for c in row
335+
if c.value is not None
336+
]
337+
matches.append(match)
338+
339+
def _get_row_data(self, sheet, row_num: int) -> list[dict[str, Any]]:
340+
"""
341+
指定行の非nullセルデータをリストとして返す
342+
343+
Args:
344+
sheet: openpyxl Worksheet
345+
row_num: 行番号
346+
347+
Returns:
348+
非nullセルの [{coordinate, value}, ...] リスト
349+
"""
350+
row_cells = sheet[row_num]
351+
# 単一列シートではCellオブジェクト単体が返される場合がある
352+
if isinstance(row_cells, Cell):
353+
row_cells = (row_cells,)
354+
return [
355+
{
356+
"coordinate": c.coordinate,
357+
"value": self._serialize_value(c.value),
358+
}
359+
for c in row_cells
360+
if c.value is not None
361+
]
309362

310363
def _calculate_header_range(self, cell_range: str, frozen_rows: int) -> str | None:
311364
"""

0 commit comments

Comments
 (0)