Skip to content

Commit f08022e

Browse files
k-ibarakiclaude
andcommitted
feat(excel): Add security measures and code improvements
Security enhancements (DoS prevention): - Add frozen_rows validation (max: 100 rows, configurable) - Add data size validation (max: 10,000 rows × 10,000 cols, configurable) - User-friendly error messages suggesting cell_range usage - Environment variables for customization: - SHAREPOINT_EXCEL_MAX_FROZEN_ROWS (default: 100) - SHAREPOINT_EXCEL_MAX_DATA_ROWS (default: 10000) - SHAREPOINT_EXCEL_MAX_DATA_COLS (default: 10000) Performance optimization: - metadata_only mode now skips data row parsing (not just response) - Significantly reduces memory usage for large files - Only processes header rows when metadata_only=True Code quality improvements: - Add _parse_rows helper method to reduce code duplication - Add _calculate_range_size helper for size validation - Cleaner and more maintainable code structure Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent afce94d commit f08022e

2 files changed

Lines changed: 123 additions & 29 deletions

File tree

src/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ def __init__(self):
5555
os.getenv("SHAREPOINT_ALLOWED_FILE_EXTENSIONS", "pdf,docx,xlsx,pptx,txt")
5656
)
5757

58+
# Excel処理の制限設定
59+
self.excel_max_frozen_rows = int(
60+
os.getenv("SHAREPOINT_EXCEL_MAX_FROZEN_ROWS", "100")
61+
)
62+
self.excel_max_data_rows = int(
63+
os.getenv("SHAREPOINT_EXCEL_MAX_DATA_ROWS", "10000")
64+
)
65+
self.excel_max_data_cols = int(
66+
os.getenv("SHAREPOINT_EXCEL_MAX_DATA_COLS", "10000")
67+
)
68+
5869
# ツール説明文のカスタマイズ
5970
self.search_tool_description = os.getenv(
6071
"SHAREPOINT_SEARCH_TOOL_DESCRIPTION",

src/sharepoint_excel.py

Lines changed: 112 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from openpyxl.utils import column_index_from_string, get_column_letter
1414
from openpyxl.utils.cell import coordinate_from_string
1515

16+
from src.config import config
17+
1618
logger = logging.getLogger(__name__)
1719

1820

@@ -187,11 +189,19 @@ def _parse_sheet(
187189
"dimensions": str(sheet.dimensions) if sheet.dimensions else None,
188190
}
189191

190-
# freeze_panes情報の取得
192+
# freeze_panes情報の取得と検証
191193
frozen_rows = 0
192194
frozen_cols = 0
193195
if include_header:
194196
frozen_rows, frozen_cols = self._parse_freeze_panes(sheet.freeze_panes)
197+
198+
# frozen_rows検証(DoS対策)
199+
if frozen_rows > config.excel_max_frozen_rows:
200+
raise ValueError(
201+
f"ヘッダー行数({frozen_rows})が上限({config.excel_max_frozen_rows})を超えています。"
202+
f"悪意のあるファイルまたは処理には大きすぎるファイルです。"
203+
)
204+
195205
if sheet.freeze_panes:
196206
sheet_data["freeze_panes"] = sheet.freeze_panes
197207
sheet_data["frozen_rows"] = frozen_rows
@@ -208,11 +218,21 @@ def _parse_sheet(
208218
coord_str = f"{col_letter}{cell_coord[0]}"
209219
merged_cell_map[coord_str] = str(merged_range)
210220

211-
# セル範囲の拡張(include_headerがTrueで固定行がある場合)
221+
# セル範囲の拡張とデータサイズ検証
212222
all_rows = []
213223
if cell_range:
214224
sheet_data["requested_range"] = cell_range
215225

226+
# データサイズ検証(DoS対策)
227+
range_rows, range_cols = self._calculate_range_size(cell_range)
228+
if range_rows > config.excel_max_data_rows or range_cols > config.excel_max_data_cols:
229+
raise ValueError(
230+
f"データサイズ({range_rows}行 × {range_cols}列)が上限"
231+
f"({config.excel_max_data_rows}行 × {config.excel_max_data_cols}列)を超えています。"
232+
f"cell_rangeパラメータで必要な範囲を指定してください。"
233+
f"例: cell_range='A1:Z1000'"
234+
)
235+
216236
# セル範囲を拡張してヘッダーを含める
217237
if include_header and frozen_rows > 0:
218238
header_range, data_range = self._expand_range_with_headers(
@@ -222,43 +242,49 @@ def _parse_sheet(
222242
# ヘッダー範囲がある場合は取得
223243
if header_range:
224244
header_data = sheet[header_range]
225-
header_rows = self._normalize_range_data(header_data)
226-
for row in header_rows:
227-
row_data = [
228-
self._parse_cell(cell, include_formatting, merged_cell_map)
229-
for cell in row
230-
]
231-
all_rows.append(row_data)
232-
233-
# データ範囲を取得
234-
range_data = sheet[data_range]
235-
data_rows = self._normalize_range_data(range_data)
236-
for row in data_rows:
245+
header_rows_data = self._normalize_range_data(header_data)
246+
all_rows.extend(self._parse_rows(header_rows_data, include_formatting, merged_cell_map))
247+
248+
# データ範囲を取得(metadata_onlyの場合はスキップ)
249+
if not metadata_only:
250+
range_data = sheet[data_range]
251+
data_rows_data = self._normalize_range_data(range_data)
252+
all_rows.extend(self._parse_rows(data_rows_data, include_formatting, merged_cell_map))
253+
else:
254+
# 通常のセル範囲取得(metadata_onlyの場合もヘッダーなしなので取得)
255+
if not metadata_only:
256+
range_data = sheet[cell_range]
257+
rows_to_process = self._normalize_range_data(range_data)
258+
all_rows.extend(self._parse_rows(rows_to_process, include_formatting, merged_cell_map))
259+
elif sheet.dimensions:
260+
# シート全体を取得
261+
# データサイズ検証(DoS対策)
262+
sheet_rows, sheet_cols = self._calculate_range_size(sheet.dimensions)
263+
if sheet_rows > config.excel_max_data_rows or sheet_cols > config.excel_max_data_cols:
264+
raise ValueError(
265+
f"シート全体のサイズ({sheet_rows}行 × {sheet_cols}列)が上限"
266+
f"({config.excel_max_data_rows}行 × {config.excel_max_data_cols}列)を超えています。"
267+
f"cell_rangeパラメータで必要な範囲を指定してください。"
268+
f"例: cell_range='A1:Z1000'"
269+
)
270+
271+
# metadata_onlyの場合はヘッダーのみ取得
272+
if metadata_only and include_header and frozen_rows > 0:
273+
# ヘッダー行のみ取得
274+
for row in sheet.iter_rows(max_row=frozen_rows):
237275
row_data = [
238276
self._parse_cell(cell, include_formatting, merged_cell_map)
239277
for cell in row
240278
]
241279
all_rows.append(row_data)
242-
else:
243-
# 通常のセル範囲取得
244-
range_data = sheet[cell_range]
245-
rows_to_process = self._normalize_range_data(range_data)
246-
for row in rows_to_process:
280+
elif not metadata_only:
281+
# 全データを取得
282+
for row in sheet.iter_rows():
247283
row_data = [
248284
self._parse_cell(cell, include_formatting, merged_cell_map)
249285
for cell in row
250286
]
251287
all_rows.append(row_data)
252-
elif sheet.dimensions:
253-
# シート全体を取得
254-
for row in sheet.iter_rows():
255-
row_data = []
256-
for cell in row:
257-
cell_data = self._parse_cell(
258-
cell, include_formatting, merged_cell_map
259-
)
260-
row_data.append(cell_data)
261-
all_rows.append(row_data)
262288

263289
# レスポンス形式の分岐
264290
if include_header:
@@ -332,6 +358,32 @@ def _parse_cell(
332358

333359
return cell_data
334360

361+
def _parse_rows(
362+
self,
363+
rows: tuple[tuple[Cell, ...], ...],
364+
include_formatting: bool,
365+
merged_cell_map: dict[str, str] | None = None,
366+
) -> list[list[dict[str, Any]]]:
367+
"""
368+
行データを解析してリスト形式で返す(コード重複削減用ヘルパー)
369+
370+
Args:
371+
rows: 行データのタプル
372+
include_formatting: 書式情報を含めるか
373+
merged_cell_map: マージセル情報
374+
375+
Returns:
376+
解析された行データのリスト
377+
"""
378+
parsed_rows = []
379+
for row in rows:
380+
row_data = [
381+
self._parse_cell(cell, include_formatting, merged_cell_map)
382+
for cell in row
383+
]
384+
parsed_rows.append(row_data)
385+
return parsed_rows
386+
335387
def _serialize_value(self, value: Any) -> Any:
336388
"""
337389
セル値をJSONシリアライズ可能な形式に変換
@@ -377,6 +429,37 @@ def _color_to_hex(self, color: Color | None) -> str | None:
377429

378430
return None
379431

432+
def _calculate_range_size(self, range_str: str) -> tuple[int, int]:
433+
"""
434+
セル範囲文字列から行数と列数を計算
435+
436+
Args:
437+
range_str: セル範囲(例: "A1:D10" または "A1:XFD1048576")
438+
439+
Returns:
440+
(rows, cols)のタプル
441+
"""
442+
try:
443+
if ":" in range_str:
444+
start_cell, end_cell = range_str.split(":")
445+
else:
446+
# 単一セルの場合
447+
return (1, 1)
448+
449+
start_col, start_row = coordinate_from_string(start_cell)
450+
end_col, end_row = coordinate_from_string(end_cell)
451+
452+
start_col_idx = column_index_from_string(start_col)
453+
end_col_idx = column_index_from_string(end_col)
454+
455+
rows = end_row - start_row + 1
456+
cols = end_col_idx - start_col_idx + 1
457+
458+
return (rows, cols)
459+
except Exception as e:
460+
logger.warning(f"Failed to calculate range size '{range_str}': {e}")
461+
return (0, 0)
462+
380463
def _parse_freeze_panes(self, freeze_panes: str | None) -> tuple[int, int]:
381464
"""
382465
freeze_panes文字列を解析して固定行数・列数を返す

0 commit comments

Comments
 (0)