Merge pull request #61 from ncdcdev/feat/issue-55-include-row-data

k-ibaraki · web-flow · commit e493d2416817 · 2026-02-11T15:50:50.000+09:00
feat: add include_row_data parameter for Excel search (issue #55)
diff --git a/docs/usage.md b/docs/usage.md
@@ -208,6 +208,7 @@ The `sharepoint_excel` tool allows you to read and search Excel files in SharePo
 | `query` | str \| None | None | Search keyword (enables search mode) |
 | `sheet` | str \| None | None | Sheet name (get specific sheet only) |
 | `cell_range` | str \| None | None | Cell range (e.g., "A1:D10") |
+| `include_row_data` | bool | False | Include entire row data for each search match (search mode only) |
 
 ### Basic Workflow
 
@@ -248,6 +249,53 @@ result = sharepoint_excel(
 }
 ```
 
+**Search with Row Data (`include_row_data=True`):**
+
+Use `include_row_data=True` to get the entire row data for each match in a single call, avoiding N+1 reads.
+
+```python
+result = sharepoint_excel(
+    file_path="/sites/finance/Shared Documents/report.xlsx",
+    query="budget",
+    include_row_data=True
+)
+```
+
+```json
+{
+  "matches": [
+    {
+      "sheet": "Sheet1",
+      "coordinate": "B5",
+      "value": "Monthly Budget",
+      "row_data": [
+        {"coordinate": "A5", "value": "Category"},
+        {"coordinate": "B5", "value": "Monthly Budget"},
+        {"coordinate": "C5", "value": 50000}
+      ]
+    }
+  ]
+}
+```
+
+**Performance Guidelines:**
+- **Small scale** (<50 matches): Highly effective, recommended
+- **Medium scale** (50-200 matches): Effective, monitor response size
+- **Large scale** (>200 matches): Consider response size impact
+
+**Important Notes:**
+- `row_data` includes only non-null cells from the matched row
+- `row_data` does NOT include header rows (even with frozen_rows)
+- To understand column meanings, first read `A1:Z5` for header context
+- **Multiple matches in same row**: Each match gets independent `row_data` (duplicated)
+  - Example: If "budget" matches both A5 and B5, both matches will include the same row_data
+  - This ensures each match is self-contained but may increase response size
+
+**Verified Use Case:**
+- 23 matches processed in 1 call (vs. 24 calls without `include_row_data`)
+- Token savings: ~2,300 tokens
+- Response time: Significantly reduced
+
 #### 2. Read All Data (Default)
 ```python
 # Get all sheets and all data
diff --git a/docs/usage_ja.md b/docs/usage_ja.md
@@ -208,6 +208,7 @@ results = sharepoint_docs_search(
 | `query` | str \| None | None | 検索キーワード（検索モードを有効化） |
 | `sheet` | str \| None | None | シート名（特定シートのみ取得） |
 | `cell_range` | str \| None | None | セル範囲（例: "A1:D10"） |
+| `include_row_data` | bool | False | 検索マッチごとに行全体のデータを含める（検索モード専用） |
 
 ### 基本的なワークフロー
 
@@ -248,6 +249,53 @@ result = sharepoint_excel(
 }
 ```
 
+**行データ付き検索（`include_row_data=True`）:**
+
+`include_row_data=True`を使用すると、各マッチの行全体のデータを1回の呼び出しで取得できます（N+1回の読み取りを回避）。
+
+```python
+result = sharepoint_excel(
+    file_path="/sites/finance/Shared Documents/report.xlsx",
+    query="予算",
+    include_row_data=True
+)
+```
+
+```json
+{
+  "matches": [
+    {
+      "sheet": "Sheet1",
+      "coordinate": "B5",
+      "value": "月間予算",
+      "row_data": [
+        {"coordinate": "A5", "value": "カテゴリ"},
+        {"coordinate": "B5", "value": "月間予算"},
+        {"coordinate": "C5", "value": 50000}
+      ]
+    }
+  ]
+}
+```
+
+**パフォーマンス目安:**
+- **小規模** (<50件): 効果大、推奨
+- **中規模** (50-200件): 効果あり、レスポンスサイズに注意
+- **大規模** (>200件): レスポンスサイズへの影響を考慮
+
+**重要な注意事項:**
+- `row_data` にはマッチした行の非nullセルのみが含まれます
+- `row_data` にはヘッダー行は含まれません（frozen_rows設定時も同様）
+- 列の意味を理解するには、先に `A1:Z5` を読み取ってヘッダーコンテキストを確認してください
+- **同一行に複数マッチがある場合**: 各マッチに独立した `row_data` が含まれます（重複）
+  - 例: "予算" が A5 と B5 の両方にマッチした場合、両方のマッチに同じ row_data が含まれます
+  - 各マッチが自己完結していますが、レスポンスサイズが増加する可能性があります
+
+**実証済みユースケース:**
+- 23件のマッチを1回の呼び出しで処理（`include_row_data` なしでは24回必要）
+- トークン削減: 約2,300トークン
+- レスポンス時間: 大幅短縮
+
 #### 2. 全データ取得（デフォルト）
 ```python
 # 全シート・全データを取得
diff --git a/src/server.py b/src/server.py
@@ -456,6 +456,7 @@ def sharepoint_excel(
     include_frozen_rows: bool = True,
     include_cell_styles: bool = False,
     expand_axis_range: bool = False,
+    include_row_data: bool = False,
     ctx: Context | None = None,
 ) -> str:
     """
@@ -478,6 +479,9 @@ def sharepoint_excel(
         expand_axis_range: 単一列/行の部分範囲を開始側に自動拡張（default: false）
             True: 例 "J50:J100" → "J1:J100"（行1に拡張）
             frozen_rows=0でヘッダー文脈が不明な場合に使用
+        include_row_data: 検索モード時、マッチしたセルの行全体のデータを含める（default: false）
+            True: 各マッチに row_data（同一行の非nullセル一覧）を追加
+            読み取りモードでは無視される
         ctx: FastMCP context (injected automatically)
 
     Returns:
@@ -497,7 +501,9 @@ def sharepoint_excel(
 
         # 検索モード
         if query:
-            return parser.search_cells(file_path, query, sheet_name=sheet)
+            return parser.search_cells(
+                file_path, query, sheet_name=sheet, include_row_data=include_row_data
+            )
 
         # 読み取りモード
         return parser.parse_to_json(
@@ -544,7 +550,7 @@ def register_tools():
         mcp.tool(
             description=(
                 "Read or search Excel files in SharePoint. "
-                "Search mode: use 'query' parameter to find cells containing specific text (returns cell locations). "
+                "Search mode: use 'query' parameter to find cells containing specific text (returns cell locations and optionally row data). "
                 "Read mode: use 'sheet' and 'cell_range' parameters to retrieve data from specific sections. "
                 "When cell_range is specified with include_frozen_rows=True (default), frozen rows are automatically "
                 "included even if they are outside the specified range. frozen_rows indicates the number of header rows "
@@ -555,10 +561,13 @@ def register_tools():
                 "Header detection: For sheets with frozen_rows > 0, headers are automatically included with include_frozen_rows=True (default). "
                 "For sheets with frozen_rows=0, headers are not automatically included and context may be unclear. "
                 "ALWAYS read exactly 5 rows for header check: 'A1:Z5' (NOT 'A1:Z50' or more). "
+                "IMPORTANT: include_row_data=True returns matched row data only (not headers), same-row matches duplicate data. "
+                "Always read 'A1:Z5' first for header context. Effective for <200 matches. "
                 "Prefer 'query' search when possible to locate data first. "
-                "Workflow: 1) Search OR read 'A1:Z5' for header check, "
-                "2) Read specific range (include_frozen_rows adds frozen headers automatically), "
-                "3) If frozen_rows=0 and header context is unclear, retry with expand_axis_range=True "
+                "Workflow: 1) Read 'A1:Z5' for header check (REQUIRED for understanding column structure), "
+                "2) Search with query (optionally with include_row_data=True to get matched row data), "
+                "3) Read specific range if needed (include_frozen_rows adds frozen headers automatically), "
+                "4) If frozen_rows=0 and header context is unclear, retry with expand_axis_range=True "
                 "to auto-include row 1 (for columns) or column A (for rows)."
             )
         )(sharepoint_excel)
diff --git a/src/sharepoint_excel.py b/src/sharepoint_excel.py
@@ -34,6 +34,7 @@ def search_cells(
         file_path: str,
         query: str,
         sheet_name: str | None = None,
+        include_row_data: bool = False,
     ) -> str:
         """
         セル内容を検索して該当位置を返す
@@ -67,25 +68,35 @@ def search_cells(
             # sheet_name 指定がある場合はそのシートを優先して検索
             if sheet_name:
                 if sheet_name in workbook.sheetnames:
-                    self._scan_sheet(workbook[sheet_name], sheet_name, query, matches)
+                    self._scan_sheet(
+                        workbook[sheet_name],
+                        sheet_name,
+                        query,
+                        matches,
+                        include_row_data,
+                    )
 
                     # マッチが無ければ全シート走査にフォールバック
                     if len(matches) == 0:
                         for sn in workbook.sheetnames:
                             if sn == sheet_name:
                                 continue
-                            self._scan_sheet(workbook[sn], sn, query, matches)
+                            self._scan_sheet(
+                                workbook[sn], sn, query, matches, include_row_data
+                            )
                 else:
                     # sheet_name が存在しない場合は「指定なし」と同じ扱いで全シート検索
                     warnings.append(
                         f"Sheet '{sheet_name}' not found. Searching all sheets instead."
                     )
                     for sn in workbook.sheetnames:
-                        self._scan_sheet(workbook[sn], sn, query, matches)
+                        self._scan_sheet(
+                            workbook[sn], sn, query, matches, include_row_data
+                        )
             else:
                 # 全シート検索
                 for sn in workbook.sheetnames:
-                    self._scan_sheet(workbook[sn], sn, query, matches)
+                    self._scan_sheet(workbook[sn], sn, query, matches, include_row_data)
 
             logger.info(f"Found {len(matches)} matches for query '{query}'")
 
@@ -270,6 +281,7 @@ def _scan_sheet(
         sheet_name_for_result: str,
         query: str,
         matches: list[dict[str, Any]],
+        include_row_data: bool = False,
     ) -> None:
         """
         シート内のセルを走査してqueryに一致するセルをmatchesに追加する
@@ -281,31 +293,72 @@ def _scan_sheet(
             # その場合はiter_rows()を使用するフォールバックロジックが動作します。
             if hasattr(sheet, "_cells"):
                 # 実在セルのみを走査（高速）
+                # まずマッチを収集（_cellsのイテレーション中にsheetアクセスすると辞書が変わるため）
+                new_matches: list[dict[str, Any]] = []
                 for cell in sheet._cells.values():
                     if cell.value is not None:
                         cell_value_str = str(cell.value)
                         if query in cell_value_str:
-                            matches.append(
+                            new_matches.append(
                                 {
                                     "sheet": sheet_name_for_result,
                                     "coordinate": cell.coordinate,
                                     "value": self._serialize_value(cell.value),
+                                    "_row": cell.row,
                                 }
                             )
+                # イテレーション完了後に行データを取得
+                for match in new_matches:
+                    row_num = match.pop("_row")
+                    if include_row_data:
+                        match["row_data"] = self._get_row_data(sheet, row_num)
+                    matches.append(match)
             else:
                 # openpyxl公開APIを使用（互換性確保）
                 for row in sheet.iter_rows(values_only=False):
                     for cell in row:
                         if cell.value is not None:
                             cell_value_str = str(cell.value)
                             if query in cell_value_str:
-                                matches.append(
-                                    {
-                                        "sheet": sheet_name_for_result,
-                                        "coordinate": cell.coordinate,
-                                        "value": self._serialize_value(cell.value),
-                                    }
-                                )
+                                match = {
+                                    "sheet": sheet_name_for_result,
+                                    "coordinate": cell.coordinate,
+                                    "value": self._serialize_value(cell.value),
+                                }
+                                if include_row_data:
+                                    match["row_data"] = [
+                                        {
+                                            "coordinate": c.coordinate,
+                                            "value": self._serialize_value(c.value),
+                                        }
+                                        for c in row
+                                        if c.value is not None
+                                    ]
+                                matches.append(match)
+
+    def _get_row_data(self, sheet, row_num: int) -> list[dict[str, Any]]:
+        """
+        指定行の非nullセルデータをリストとして返す
+
+        Args:
+            sheet: openpyxl Worksheet
+            row_num: 行番号
+
+        Returns:
+            非nullセルの [{coordinate, value}, ...] リスト
+        """
+        row_cells = sheet[row_num]
+        # 単一列シートではCellオブジェクト単体が返される場合がある
+        if isinstance(row_cells, Cell):
+            row_cells = (row_cells,)
+        return [
+            {
+                "coordinate": c.coordinate,
+                "value": self._serialize_value(c.value),
+            }
+            for c in row_cells
+            if c.value is not None
+        ]
 
     def _calculate_header_range(self, cell_range: str, frozen_rows: int) -> str | None:
         """
diff --git a/tests/test_server.py b/tests/test_server.py
@@ -244,7 +244,7 @@ def test_excel_search_mode(
 
                 # 検索メソッドが呼ばれることを確認
                 mock_excel_parser.search_cells.assert_called_once_with(
-                    "/sites/test/Shared Documents/test.xlsx", "売上", sheet_name=None
+                    "/sites/test/Shared Documents/test.xlsx", "売上", sheet_name=None, include_row_data=False
                 )
                 # parse_to_jsonは呼ばれない
                 mock_excel_parser.parse_to_json.assert_not_called()
@@ -295,6 +295,26 @@ def test_excel_with_cell_range_parameter(
                     expand_axis_range=False,
                 )
 
+    @pytest.mark.unit
+    def test_excel_search_with_include_row_data(
+        self, mock_config, mock_sharepoint_client, mock_excel_parser
+    ):
+        """Excel検索モードでinclude_row_data=Trueが渡されるテスト"""
+        with patch(
+            "src.server._get_sharepoint_client", return_value=mock_sharepoint_client
+        ):
+            with patch("src.server.config", mock_config):
+                sharepoint_excel(
+                    file_path="/sites/test/Shared Documents/test.xlsx",
+                    query="売上",
+                    include_row_data=True,
+                )
+
+                mock_excel_parser.search_cells.assert_called_once_with(
+                    "/sites/test/Shared Documents/test.xlsx", "売上", sheet_name=None, include_row_data=True
+                )
+                mock_excel_parser.parse_to_json.assert_not_called()
+
     @pytest.mark.unit
     def test_excel_with_real_json(
         self, mock_config, mock_sharepoint_client, mock_excel_parser
diff --git a/tests/test_sharepoint_excel.py b/tests/test_sharepoint_excel.py