Skip to content

Commit 00a21ec

Browse files
k-ibarakiclaude
andcommitted
fix(excel): Fix freeze_panes scroll position bug and update config defaults
## Bug Fix - Replace _parse_freeze_panes with _get_frozen_panes to use pane.ySplit/xSplit - Previously used sheet.freeze_panes (=pane.topLeftCell) which reflects scroll position - This caused incorrect frozen_rows detection when Excel file was saved after scrolling - Add _format_freeze_panes helper to generate freeze_panes string representation ## Tests - Add test for scrolled position scenario (topLeftCell changed but ySplit unchanged) - Add test for split pane (state="split") to verify it's correctly ignored - All 32 tests pass ## Configuration Updates - Add SHAREPOINT_EXCEL_MAX_FROZEN_ROWS to .env.example (default: 100) - Add SHAREPOINT_EXCEL_MAX_DATA_ROWS to .env.example (default: 10000) - Reduce SHAREPOINT_EXCEL_MAX_DATA_COLS default from 10000 to 2000 - Update setup.md and setup_ja.md with Excel processing limits documentation Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 5dc73ef commit 00a21ec

6 files changed

Lines changed: 139 additions & 25 deletions

File tree

.env.example

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,22 @@ SHAREPOINT_DEFAULT_MAX_RESULTS=20
8888
# Allowed file extensions / 許可するファイル拡張子
8989
SHAREPOINT_ALLOWED_FILE_EXTENSIONS=pdf,docx,xlsx,pptx,txt,md
9090

91+
# ============================================================================
92+
# Excel Processing Limits (Optional) / Excel処理制限(オプション)
93+
# ============================================================================
94+
95+
# Maximum frozen rows (header rows limit for DoS protection)
96+
# 最大固定行数(DoS対策のヘッダー行上限)
97+
# SHAREPOINT_EXCEL_MAX_FROZEN_ROWS=100
98+
99+
# Maximum data rows (prevent processing extremely large Excel files)
100+
# 最大データ行数(巨大なExcelファイル処理を防止)
101+
# SHAREPOINT_EXCEL_MAX_DATA_ROWS=10000
102+
103+
# Maximum data columns (prevent processing extremely wide Excel files)
104+
# 最大データ列数(極端に幅広いExcelファイル処理を防止)
105+
# SHAREPOINT_EXCEL_MAX_DATA_COLS=2000
106+
91107
# ============================================================================
92108
# Tool Descriptions (Optional) / ツール説明文(オプション)
93109
# ============================================================================

docs/setup.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ SHAREPOINT_SITE_NAME=yoursite
4646
SHAREPOINT_DEFAULT_MAX_RESULTS=20
4747
SHAREPOINT_ALLOWED_FILE_EXTENSIONS=pdf,docx,xlsx,pptx,txt,md
4848

49+
# Excel processing limits (optional)
50+
# SHAREPOINT_EXCEL_MAX_FROZEN_ROWS=100 # Maximum frozen rows (DoS protection)
51+
# SHAREPOINT_EXCEL_MAX_DATA_ROWS=10000 # Maximum data rows
52+
# SHAREPOINT_EXCEL_MAX_DATA_COLS=2000 # Maximum data columns
53+
4954
# Tool description customization (optional)
5055
# SHAREPOINT_SEARCH_TOOL_DESCRIPTION=Search internal documents
5156
# SHAREPOINT_DOWNLOAD_TOOL_DESCRIPTION=Download files from search results

docs/setup_ja.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ SHAREPOINT_SITE_NAME=yoursite
4646
SHAREPOINT_DEFAULT_MAX_RESULTS=20
4747
SHAREPOINT_ALLOWED_FILE_EXTENSIONS=pdf,docx,xlsx,pptx,txt,md
4848

49+
# Excel処理制限(オプション)
50+
# SHAREPOINT_EXCEL_MAX_FROZEN_ROWS=100 # 最大固定行数(DoS対策)
51+
# SHAREPOINT_EXCEL_MAX_DATA_ROWS=10000 # 最大データ行数
52+
# SHAREPOINT_EXCEL_MAX_DATA_COLS=2000 # 最大データ列数
53+
4954
# ツール説明文のカスタマイズ(オプション)
5055
# SHAREPOINT_SEARCH_TOOL_DESCRIPTION=社内文書を検索します
5156
# SHAREPOINT_DOWNLOAD_TOOL_DESCRIPTION=検索結果からファイルをダウンロードします

src/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def __init__(self):
6363
os.getenv("SHAREPOINT_EXCEL_MAX_DATA_ROWS", "10000")
6464
)
6565
self.excel_max_data_cols = int(
66-
os.getenv("SHAREPOINT_EXCEL_MAX_DATA_COLS", "10000")
66+
os.getenv("SHAREPOINT_EXCEL_MAX_DATA_COLS", "2000")
6767
)
6868

6969
# ツール説明文のカスタマイズ

src/sharepoint_excel.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _parse_sheet(
193193
frozen_rows = 0
194194
frozen_cols = 0
195195
if include_header:
196-
frozen_rows, frozen_cols = self._parse_freeze_panes(sheet.freeze_panes)
196+
frozen_rows, frozen_cols = self._get_frozen_panes(sheet)
197197

198198
# frozen_rows検証(DoS対策)
199199
if frozen_rows > config.excel_max_frozen_rows:
@@ -202,8 +202,10 @@ def _parse_sheet(
202202
f"悪意のあるファイルまたは処理には大きすぎるファイルです。"
203203
)
204204

205-
if sheet.freeze_panes:
206-
sheet_data["freeze_panes"] = sheet.freeze_panes
205+
if frozen_rows > 0 or frozen_cols > 0:
206+
sheet_data["freeze_panes"] = self._format_freeze_panes(
207+
frozen_rows, frozen_cols
208+
)
207209
sheet_data["frozen_rows"] = frozen_rows
208210
sheet_data["frozen_cols"] = frozen_cols
209211

@@ -297,7 +299,9 @@ def _parse_sheet(
297299

298300
if rows_to_process:
299301
all_rows.extend(
300-
self._parse_rows(rows_to_process, include_formatting, merged_cell_map)
302+
self._parse_rows(
303+
rows_to_process, include_formatting, merged_cell_map
304+
)
301305
)
302306

303307
# レスポンス形式の分岐
@@ -481,38 +485,46 @@ def _calculate_range_size(self, range_str: str) -> tuple[int, int]:
481485
logger.warning(f"Failed to calculate range size '{range_str}': {e}")
482486
return (0, 0)
483487

484-
def _parse_freeze_panes(self, freeze_panes: str | None) -> tuple[int, int]:
488+
def _get_frozen_panes(self, sheet) -> tuple[int, int]:
485489
"""
486-
freeze_panes文字列を解析して固定行数・列数を返す
490+
シートのpane情報から固定行数・列数を返す(ySplit/xSplit使用)
491+
492+
sheet.freeze_panes(= pane.topLeftCell)はスクロール位置に依存するため、
493+
正確な固定行数・列数を得るには pane.ySplit / pane.xSplit を直接参照する。
487494
488495
Args:
489-
freeze_panes: freeze_panes文字列(例: "B2", "A2", "B1", None)
496+
sheet: openpyxl Worksheet
490497
491498
Returns:
492499
(frozen_rows, frozen_cols)のタプル
493-
例: "B2" → (1, 1)(行1と列Aが固定)
494-
"A2" → (1, 0)(行1のみ固定)
495-
"B1" → (0, 1)(列Aのみ固定)
496-
None → (0, 0)(固定なし)
497500
"""
498-
if not freeze_panes:
499-
return (0, 0)
500-
501501
try:
502-
# "B2" → ("B", 2)
503-
col_letter, row = coordinate_from_string(freeze_panes)
504-
# "B" → 2
505-
col_index = column_index_from_string(col_letter)
506-
507-
# freeze_panes="B2"の場合、行2より前(行1)と列B(2列目)より前(列A)が固定
508-
frozen_rows = row - 1
509-
frozen_cols = col_index - 1
510-
502+
pane = sheet.sheet_view.pane
503+
if pane is None:
504+
return (0, 0)
505+
if pane.state not in ("frozen", "frozenSplit"):
506+
return (0, 0)
507+
frozen_rows = int(pane.ySplit) if pane.ySplit else 0
508+
frozen_cols = int(pane.xSplit) if pane.xSplit else 0
511509
return (frozen_rows, frozen_cols)
512510
except Exception as e:
513-
logger.warning(f"Failed to parse freeze_panes '{freeze_panes}': {e}")
511+
logger.warning(f"Failed to get frozen panes info: {e}")
514512
return (0, 0)
515513

514+
def _format_freeze_panes(self, frozen_rows: int, frozen_cols: int) -> str:
515+
"""
516+
固定行数・列数からfreeze_panes文字列表現を生成
517+
518+
Args:
519+
frozen_rows: 固定行数
520+
frozen_cols: 固定列数
521+
522+
Returns:
523+
freeze_panes文字列表現(例: "B4")
524+
"""
525+
col_letter = get_column_letter(frozen_cols + 1)
526+
return f"{col_letter}{frozen_rows + 1}"
527+
516528
def _expand_range_with_headers(
517529
self, cell_range: str, frozen_rows: int
518530
) -> tuple[str | None, str]:

tests/test_sharepoint_excel.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,3 +732,79 @@ def test_parse_with_metadata_only_and_cell_range(self):
732732

733733
# データ行は空リスト(metadata_onlyのため)
734734
assert sheet["data_rows"] == []
735+
736+
def test_freeze_panes_scrolled_position_does_not_affect_frozen_rows(self):
737+
"""スクロール後に保存されたファイルでfrozen_rowsが正しく取得されるテスト
738+
739+
Excel上で3行固定して行450付近にスクロールして保存すると、
740+
pane.topLeftCell="A450"になるが、pane.ySplit=3は不変。
741+
旧実装ではsheet.freeze_panes(=topLeftCell)を解析していたため
742+
frozen_rows=449と誤判定していたバグを検証する。
743+
"""
744+
wb = Workbook()
745+
ws = wb.active
746+
ws.title = "ScrolledSheet"
747+
748+
# ヘッダー3行 + データ行
749+
for row in range(1, 11):
750+
ws.cell(row=row, column=1, value=f"Row{row}")
751+
752+
# 3行固定を設定
753+
ws.freeze_panes = "A4"
754+
755+
# スクロール位置を変更(pane.topLeftCellを直接操作)
756+
pane = ws.sheet_view.pane
757+
pane.topLeftCell = "A450"
758+
759+
excel_bytes = BytesIO()
760+
wb.save(excel_bytes)
761+
excel_bytes.seek(0)
762+
763+
self.mock_download_client.download_file.return_value = excel_bytes.getvalue()
764+
765+
parser = SharePointExcelParser(self.mock_download_client)
766+
result_json = parser.parse_to_json("/test/scrolled.xlsx", include_header=True)
767+
768+
result = json.loads(result_json)
769+
sheet = result["sheets"][0]
770+
771+
# pane.ySplit=3なので、frozen_rowsは3であるべき(449ではない)
772+
assert sheet["frozen_rows"] == 3
773+
assert sheet["frozen_cols"] == 0
774+
assert sheet["freeze_panes"] == "A4"
775+
776+
# ヘッダー行は3行
777+
assert len(sheet["header_rows"]) == 3
778+
assert sheet["header_rows"][0][0]["value"] == "Row1"
779+
assert sheet["header_rows"][2][0]["value"] == "Row3"
780+
781+
def test_split_pane_is_ignored(self):
782+
"""split pane(state="split")は固定行として認識されないテスト"""
783+
wb = Workbook()
784+
ws = wb.active
785+
ws.title = "SplitSheet"
786+
787+
ws["A1"] = "Header"
788+
ws["A2"] = "Data"
789+
790+
# split paneを設定(frozenではなくsplit)
791+
from openpyxl.worksheet.views import Pane
792+
793+
ws.sheet_view.pane = Pane(ySplit=3, xSplit=0, state="split")
794+
795+
excel_bytes = BytesIO()
796+
wb.save(excel_bytes)
797+
excel_bytes.seek(0)
798+
799+
self.mock_download_client.download_file.return_value = excel_bytes.getvalue()
800+
801+
parser = SharePointExcelParser(self.mock_download_client)
802+
result_json = parser.parse_to_json("/test/split.xlsx", include_header=True)
803+
804+
result = json.loads(result_json)
805+
sheet = result["sheets"][0]
806+
807+
# split paneは固定行として認識されない
808+
assert sheet["frozen_rows"] == 0
809+
assert sheet["frozen_cols"] == 0
810+
assert "freeze_panes" not in sheet

0 commit comments

Comments
 (0)