Skip to content

Commit 7780184

Browse files
authored
Merge pull request #40 from ncdcdev/perf/eliminate-duplicate-range-normalization
perf: 重複計算の削減とコードのシンプル化
2 parents 7ad8487 + 567c902 commit 7780184

4 files changed

Lines changed: 235 additions & 21 deletions

File tree

CLAUDE.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,32 @@ def some_function():
6767
return something()
6868
```
6969

70+
## Design Principles
71+
72+
### MCP Server Design Philosophy
73+
74+
**Simplicity over Complexity**
75+
- Prefer simple, explicit APIs over "smart" automatic conversions
76+
- Trust LLM's ability to learn from clear error messages
77+
- Avoid over-engineering: features are easy to add but hard to remove
78+
79+
**YAGNI (You Aren't Gonna Need It)**
80+
- Only implement features when they are clearly needed
81+
- Before adding a feature, ask: "Can LLM handle this on its own?"
82+
- Consider long-term maintenance cost vs. short-term convenience
83+
84+
**Feature Addition Checklist**
85+
- [ ] Is this feature truly necessary? (Can't LLM adapt?)
86+
- [ ] Is there a simpler alternative?
87+
- [ ] Can this be removed in the future without breaking compatibility?
88+
- [ ] Is the maintenance cost acceptable?
89+
- [ ] Does this add significant value to justify the complexity?
90+
91+
**Context Efficiency**
92+
- Keep tool descriptions concise (they're included in every LLM call)
93+
- Prioritize essential information over exhaustive documentation
94+
- Use external docs (README) for detailed explanations
95+
7096
### Project-Specific Guidelines
7197

7298
#### MCP Development

src/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ def sharepoint_excel(
463463
query: 検索キーワード(指定すると検索モード)
464464
sheet: シート名(特定シートのみ取得)
465465
cell_range: セル範囲(例: "A1:D10")
466+
- 推奨形式: "A1:D10"(開始セル:終了セル)
467+
- 列のみ: "A:D" も可(自動的に行範囲が追加されます)
468+
- ⚠️ 単一列/行の部分範囲は開始側のみ 1/A に自動拡張されます
469+
例: "J50:J100" → "J1:J100"(開始行が 1 行目に拡張されます)
466470
ctx: FastMCP context (injected automatically)
467471
468472
Returns:

src/sharepoint_excel.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -329,19 +329,9 @@ def _parse_sheet(
329329
sheet_data["frozen_rows"] = frozen_rows
330330
sheet_data["frozen_cols"] = frozen_cols
331331

332-
# マージセル情報をキャッシュ(パフォーマンス最適化)
333-
# 結合セルの場合はmerged_rangesを渡す
334-
merged_cell_map, merged_anchor_value_map, merged_ranges = (
335-
self._build_merged_cell_cache(sheet, cell_range)
336-
)
337-
338-
# ここは「結合セルがある時だけ」返す
339-
if merged_ranges:
340-
sheet_data["merged_ranges"] = merged_ranges
341-
342-
# セル範囲の拡張とデータサイズ検証
343-
all_rows = []
344-
332+
# セル範囲の正規化・拡張(cell_rangeがある場合)
333+
# マージセル情報のキャッシュに使用するため、先に計算する
334+
effective_range_for_merge = None
345335
if cell_range:
346336
sheet_data["requested_range"] = cell_range
347337
effective_range = self._normalize_column_range(cell_range, sheet)
@@ -363,7 +353,24 @@ def _parse_sheet(
363353
sheet.title,
364354
)
365355
sheet_data["effective_range"] = effective_range
356+
effective_range_for_merge = effective_range
366357

358+
# マージセル情報をキャッシュ(パフォーマンス最適化)
359+
# 計算済みのeffective_range(effective_range_for_merge)を渡してキャッシュを構築し、
360+
# 戻り値としてmerged_ranges(結合セル範囲の一覧)を取得することで重複計算を回避
361+
merged_cell_map, merged_anchor_value_map, merged_ranges = (
362+
self._build_merged_cell_cache(sheet, effective_range_for_merge)
363+
)
364+
365+
# ここは「結合セルがある時だけ」返す
366+
if merged_ranges:
367+
sheet_data["merged_ranges"] = merged_ranges
368+
369+
# セル範囲のデータサイズ検証とデータ取得
370+
all_rows = []
371+
372+
if cell_range:
373+
# effective_rangeは既に計算済み
367374
# データサイズ検証(DoS対策)
368375
range_rows, range_cols = self._calculate_range_size(effective_range)
369376
if (
@@ -422,7 +429,7 @@ def _parse_sheet(
422429
def _build_merged_cell_cache(
423430
self,
424431
sheet,
425-
cell_range: str | None,
432+
effective_cell_range: str | None,
426433
) -> tuple[
427434
dict[str, str] | None,
428435
dict[str, Any] | None,
@@ -432,18 +439,24 @@ def _build_merged_cell_cache(
432439
マージセル情報をキャッシュして返す(パフォーマンス最適化)
433440
- 「今回返す予定の範囲」を先に確定し、その範囲と交差する結合だけを部分展開する
434441
- アンカー値は左上→無ければ結合範囲内の実在セルのみから最小(row,col)を選ぶ
442+
443+
Args:
444+
sheet: openpyxl Worksheet
445+
effective_cell_range: 正規化・拡張済みのセル範囲(例: "A1:D10")
446+
Noneの場合はsheet.dimensionsを使用
447+
448+
Returns:
449+
(merged_cell_map, merged_anchor_value_map, merged_ranges)のタプル
435450
"""
436451
merged_cell_map: dict[str, str] | None = None
437452
merged_anchor_value_map: dict[str, Any] | None = None
438453
merged_ranges: list[dict[str, Any]] = []
439454

440455
# 今回返す予定の範囲(結合情報の部分展開に使用)
441-
planned_range_for_merge: str | None = None
442-
if cell_range:
443-
planned_range_for_merge = self._normalize_column_range(cell_range, sheet)
444-
planned_range_for_merge = self._expand_axis_range(planned_range_for_merge)
445-
elif sheet.dimensions:
446-
planned_range_for_merge = str(sheet.dimensions)
456+
# effective_cell_rangeがあればそれを使用、なければsheet.dimensionsを使用
457+
planned_range_for_merge = effective_cell_range or (
458+
str(sheet.dimensions) if sheet.dimensions else None
459+
)
447460

448461
if not sheet.merged_cells.ranges or not planned_range_for_merge:
449462
return (None, None, [])

tests/test_sharepoint_excel.py

Lines changed: 172 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
import json
33
from io import BytesIO
4-
from unittest.mock import Mock
4+
from unittest.mock import Mock, patch
55
from zipfile import BadZipFile
66

77
import pytest
@@ -841,3 +841,174 @@ def test_normalize_column_range_already_normalized(self):
841841
# 空白のみ
842842
normalized = parser._normalize_column_range(" ", ws)
843843
assert normalized == " "
844+
845+
def test_no_duplicate_range_normalization(self):
846+
"""
847+
セル範囲の正規化・拡張が重複して実行されないことを確認
848+
849+
課題3-2の対応:_parse_sheetと_build_merged_cell_cacheで
850+
重複していた計算が1回のみになったことを検証
851+
"""
852+
# テスト用Excelを作成(結合セルあり)
853+
wb = Workbook()
854+
ws = wb.active
855+
ws.title = "TestSheet"
856+
ws["A1"] = "Header1"
857+
ws["B1"] = "Header2"
858+
ws["A2"] = "Data1"
859+
ws["B2"] = "Data2"
860+
ws["A3"] = "Data3"
861+
ws["B3"] = "Data4"
862+
863+
# A1:B1を結合
864+
ws.merge_cells("A1:B1")
865+
866+
excel_bytes = BytesIO()
867+
wb.save(excel_bytes)
868+
excel_bytes.seek(0)
869+
870+
# モックの設定
871+
self.mock_download_client.download_file.return_value = excel_bytes.getvalue()
872+
873+
parser = SharePointExcelParser(self.mock_download_client)
874+
875+
# _normalize_column_rangeと_expand_axis_rangeの呼び出し回数をカウント
876+
with patch.object(
877+
parser, "_normalize_column_range", wraps=parser._normalize_column_range
878+
) as mock_normalize, patch.object(
879+
parser, "_expand_axis_range", wraps=parser._expand_axis_range
880+
) as mock_expand:
881+
# 列範囲指定で解析
882+
result = parser.parse_to_json("/test/file.xlsx", cell_range="A:B")
883+
result_data = json.loads(result)
884+
885+
# 結果が正しいことを確認
886+
assert "sheets" in result_data
887+
assert len(result_data["sheets"]) == 1
888+
assert result_data["sheets"][0]["name"] == "TestSheet"
889+
890+
# _normalize_column_rangeは1回だけ呼ばれる(重複なし)
891+
assert (
892+
mock_normalize.call_count == 1
893+
), f"Expected 1 call, got {mock_normalize.call_count}"
894+
895+
# _expand_axis_rangeは1回だけ呼ばれる(重複なし)
896+
assert (
897+
mock_expand.call_count == 1
898+
), f"Expected 1 call, got {mock_expand.call_count}"
899+
900+
def test_build_merged_cell_cache_with_effective_range(self):
901+
"""
902+
_build_merged_cell_cacheにeffective_cell_rangeを渡した場合の動作確認
903+
904+
計算済みの範囲を渡すことで、内部での重複計算が回避されることを検証
905+
"""
906+
# テスト用Excelを作成(結合セルあり)
907+
wb = Workbook()
908+
ws = wb.active
909+
ws.title = "TestSheet"
910+
ws["A1"] = "Merged"
911+
ws["A2"] = "Data1"
912+
ws["B2"] = "Data2"
913+
914+
# A1:B1を結合
915+
ws.merge_cells("A1:B1")
916+
917+
parser = SharePointExcelParser(self.mock_download_client)
918+
919+
# effective_cell_rangeを渡して呼び出し
920+
merged_cell_map, merged_anchor_value_map, merged_ranges = (
921+
parser._build_merged_cell_cache(ws, effective_cell_range="A1:B2")
922+
)
923+
924+
# 結合セル情報が正しく取得されることを確認
925+
assert merged_cell_map is not None
926+
assert merged_ranges is not None
927+
assert len(merged_ranges) == 1
928+
assert merged_ranges[0]["range"] == "A1:B1"
929+
930+
def test_build_merged_cell_cache_without_effective_range(self):
931+
"""
932+
_build_merged_cell_cacheにNoneを渡した場合の動作確認
933+
934+
effective_cell_rangeがNoneの場合、sheet.dimensionsが使用されることを検証
935+
"""
936+
# テスト用Excelを作成(結合セルあり)
937+
wb = Workbook()
938+
ws = wb.active
939+
ws.title = "TestSheet"
940+
ws["A1"] = "Merged"
941+
ws["B1"] = "Header"
942+
ws["A2"] = "Data1"
943+
ws["B2"] = "Data2"
944+
945+
# A1:B1を結合
946+
ws.merge_cells("A1:B1")
947+
948+
parser = SharePointExcelParser(self.mock_download_client)
949+
950+
# effective_cell_rangeにNoneを渡して呼び出し
951+
# sheet.dimensionsが使用される
952+
merged_cell_map, merged_anchor_value_map, merged_ranges = (
953+
parser._build_merged_cell_cache(ws, effective_cell_range=None)
954+
)
955+
956+
# 結合セル情報が正しく取得されることを確認
957+
assert merged_cell_map is not None
958+
assert merged_ranges is not None
959+
assert len(merged_ranges) == 1
960+
assert merged_ranges[0]["range"] == "A1:B1"
961+
962+
def test_range_normalization_integration(self):
963+
"""
964+
セル範囲の正規化・拡張と結合セル処理の統合テスト
965+
966+
列範囲指定("A:B")が正しく正規化・拡張され、
967+
結合セル情報も正しく取得されることを検証
968+
"""
969+
# テスト用Excelを作成(結合セルあり)
970+
wb = Workbook()
971+
ws = wb.active
972+
ws.title = "TestSheet"
973+
ws["A1"] = "Merged Header"
974+
ws["A2"] = "Data1"
975+
ws["B2"] = "Data2"
976+
ws["A3"] = "Data3"
977+
ws["B3"] = "Data4"
978+
979+
# A1:B1を結合
980+
ws.merge_cells("A1:B1")
981+
982+
excel_bytes = BytesIO()
983+
wb.save(excel_bytes)
984+
excel_bytes.seek(0)
985+
986+
# モックの設定
987+
self.mock_download_client.download_file.return_value = excel_bytes.getvalue()
988+
989+
parser = SharePointExcelParser(self.mock_download_client)
990+
991+
# 列範囲指定で解析
992+
result = parser.parse_to_json("/test/file.xlsx", cell_range="A:B")
993+
result_data = json.loads(result)
994+
995+
# 結果検証
996+
assert "sheets" in result_data
997+
assert len(result_data["sheets"]) == 1
998+
999+
sheet_data = result_data["sheets"][0]
1000+
assert sheet_data["name"] == "TestSheet"
1001+
1002+
# requested_rangeとeffective_rangeが設定されている
1003+
assert sheet_data["requested_range"] == "A:B"
1004+
assert "effective_range" in sheet_data
1005+
assert sheet_data["effective_range"].startswith("A1:B")
1006+
1007+
# 結合セル情報が取得されている
1008+
assert "merged_ranges" in sheet_data
1009+
assert len(sheet_data["merged_ranges"]) == 1
1010+
assert sheet_data["merged_ranges"][0]["range"] == "A1:B1"
1011+
1012+
# データも正しく取得されている
1013+
assert "rows" in sheet_data
1014+
assert len(sheet_data["rows"]) > 0

0 commit comments

Comments
 (0)