Optimize hot path performance for type hint conversion

laughingman7743 · claude · laughingman7743 · commit 2ff410c331eb · 2026-02-28T19:22:02.000+09:00
- ResultSet: Pre-compute column_type_hints tuple once in
  _process_metadata instead of per-cell dict creation and .lower()
  lookup. Replace **({} if ... else {}) with simple if/else branching.
  Applied to AthenaResultSet, AthenaDictResultSet, and S3FS.

- Array JSON guard: Add JSON detection heuristic (check for '"', '[{',
  '[null') before json.loads in _convert_typed_array, matching the
  existing pattern in map/struct to avoid JSONDecodeError exceptions
  on native format strings.

- TypeNode field lookup: Add cached _field_type_map dict for O(1)
  name-based field type resolution, replacing O(n) list.index() in
  _get_field_type.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/pyathena/parser.py b/pyathena/parser.py
@@ -58,6 +58,23 @@ class TypeNode:
     type_name: str
     children: list[TypeNode] = field(default_factory=list)
     field_names: list[str] | None = None
+    _field_type_map: dict[str, TypeNode] | None = field(default=None, repr=False)
+
+    def get_field_type(self, name: str) -> TypeNode | None:
+        """Look up a child type node by field name using a cached dict.
+
+        Returns:
+            The TypeNode for the named field, or None if not found.
+        """
+        if self._field_type_map is None and self.field_names:
+            self._field_type_map = {
+                fn: self.children[i]
+                for i, fn in enumerate(self.field_names)
+                if i < len(self.children)
+            }
+        if self._field_type_map:
+            return self._field_type_map.get(name)
+        return None
 
 
 class TypeSignatureParser:
@@ -260,16 +277,20 @@ def _convert_typed_array(self, value: str, type_node: TypeNode) -> list[Any] | N
 
         element_type = type_node.children[0] if type_node.children else TypeNode("varchar")
 
-        # Try JSON first
-        try:
-            parsed = json.loads(value)
-            if isinstance(parsed, list):
-                return [
-                    None if elem is None else self.convert(self._to_json_str(elem), element_type)
-                    for elem in parsed
-                ]
-        except json.JSONDecodeError:
-            pass
+        # Try JSON first (only if content looks like JSON)
+        inner_preview = value[1:10] if len(value) > 10 else value[1:-1]
+        if '"' in inner_preview or value.startswith(("[{", "[null")):
+            try:
+                parsed = json.loads(value)
+                if isinstance(parsed, list):
+                    return [
+                        None
+                        if elem is None
+                        else self.convert(self._to_json_str(elem), element_type)
+                        for elem in parsed
+                    ]
+            except json.JSONDecodeError:
+                pass
 
         # Native format
         inner = value[1:-1].strip()
@@ -376,7 +397,6 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
         if not (value.startswith("{") and value.endswith("}")):
             return None
 
-        field_names = type_node.field_names or []
         field_types = type_node.children or []
 
         # Try JSON first
@@ -387,7 +407,7 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
                 if isinstance(parsed, dict):
                     result: dict[str, Any] = {}
                     for i, (k, v) in enumerate(parsed.items()):
-                        ft = self._get_field_type(k, field_names, field_types, i)
+                        ft = self._get_field_type(k, type_node, i)
                         result[k] = (
                             self.convert(self._to_json_str(v), ft) if v is not None else None
                         )
@@ -413,7 +433,7 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
                 if any(char in k for char in '{}="'):
                     continue
 
-                ft = self._get_field_type(k, field_names, field_types, field_index)
+                ft = self._get_field_type(k, type_node, field_index)
                 field_index += 1
 
                 if v.startswith("{") and v.endswith("}"):
@@ -428,6 +448,7 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
             return result if result else None
 
         # Unnamed struct
+        field_names = type_node.field_names or []
         values = [v.strip() for v in inner.split(",")]
         result = {}
         for i, v in enumerate(values):
@@ -436,30 +457,29 @@ def _convert_typed_struct(self, value: str, type_node: TypeNode) -> dict[str, An
             result[name] = self._convert_element(v, ft)
         return result
 
+    @staticmethod
     def _get_field_type(
-        self,
         field_name: str,
-        field_names: list[str],
-        field_types: list[TypeNode],
+        type_node: TypeNode,
         field_index: int,
     ) -> TypeNode:
         """Look up the type for a struct field by name or index.
 
-        Tries name-based lookup first, then falls back to positional index.
+        Uses the TypeNode's cached dict for O(1) name lookup, then falls
+        back to positional index.
 
         Args:
             field_name: Name of the field to look up.
-            field_names: List of known field names from the type hint.
-            field_types: List of corresponding field types.
+            type_node: The parent row/struct TypeNode.
             field_index: Current positional index as fallback.
 
         Returns:
             TypeNode for the field, defaulting to varchar if not found.
         """
-        if field_name in field_names:
-            idx = field_names.index(field_name)
-            if idx < len(field_types):
-                return field_types[idx]
+        ft = type_node.get_field_type(field_name)
+        if ft is not None:
+            return ft
+        field_types = type_node.children or []
         if field_index < len(field_types):
             return field_types[field_index]
         return TypeNode("varchar")
diff --git a/pyathena/result_set.py b/pyathena/result_set.py
@@ -83,6 +83,7 @@ def __init__(
         )
 
         self._metadata: tuple[dict[str, Any], ...] | None = None
+        self._column_type_hints: tuple[str | None, ...] | None = None
         self._rows: collections.deque[tuple[Any | None, ...] | dict[Any, Any | None]] = (
             collections.deque()
         )
@@ -424,6 +425,10 @@ def _process_metadata(self, response: dict[str, Any]) -> None:
         if column_info is None:
             raise DataError("KeyError `ColumnInfo`")
         self._metadata = tuple(column_info)
+        if self._result_set_type_hints:
+            self._column_type_hints = tuple(
+                self._result_set_type_hints.get(m.get("Name", "").lower()) for m in self._metadata
+            )
 
     def _process_update_count(self, response: dict[str, Any]) -> None:
         update_count = response.get("UpdateCount")
@@ -449,21 +454,23 @@ def _get_rows(
         converter: Converter | None = None,
     ) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
         conv = converter or self._converter
-        hints = self._result_set_type_hints
+        col_hints = self._column_type_hints
+        if col_hints:
+            return [
+                tuple(
+                    conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
+                    if hint
+                    else conv.convert(meta.get("Type"), row.get("VarCharValue"))
+                    for meta, row, hint in zip(
+                        metadata, rows[i].get("Data", []), col_hints, strict=False
+                    )
+                )
+                for i in range(offset, len(rows))
+            ]
         return [
             tuple(
-                [
-                    conv.convert(
-                        meta.get("Type"),
-                        row.get("VarCharValue"),
-                        **(
-                            {"type_hint": hints[meta.get("Name", "").lower()]}
-                            if hints and meta.get("Name", "").lower() in hints
-                            else {}
-                        ),
-                    )
-                    for meta, row in zip(metadata, rows[i].get("Data", []), strict=False)
-                ]
+                conv.convert(meta.get("Type"), row.get("VarCharValue"))
+                for meta, row in zip(metadata, rows[i].get("Data", []), strict=False)
             )
             for i in range(offset, len(rows))
         ]
@@ -645,24 +652,29 @@ def _get_rows(
         converter: Converter | None = None,
     ) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
         conv = converter or self._converter
-        hints = self._result_set_type_hints
-        return [
-            self.dict_type(
-                [
+        col_hints = self._column_type_hints
+        if col_hints:
+            return [
+                self.dict_type(
                     (
                         meta.get("Name"),
-                        conv.convert(
-                            meta.get("Type"),
-                            row.get("VarCharValue"),
-                            **(
-                                {"type_hint": hints[meta.get("Name", "").lower()]}
-                                if hints and meta.get("Name", "").lower() in hints
-                                else {}
-                            ),
-                        ),
+                        conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
+                        if hint
+                        else conv.convert(meta.get("Type"), row.get("VarCharValue")),
                     )
-                    for meta, row in zip(metadata, rows[i].get("Data", []), strict=False)
-                ]
+                    for meta, row, hint in zip(
+                        metadata, rows[i].get("Data", []), col_hints, strict=False
+                    )
+                )
+                for i in range(offset, len(rows))
+            ]
+        return [
+            self.dict_type(
+                (
+                    meta.get("Name"),
+                    conv.convert(meta.get("Type"), row.get("VarCharValue")),
+                )
+                for meta, row in zip(metadata, rows[i].get("Data", []), strict=False)
             )
             for i in range(offset, len(rows))
         ]
diff --git a/pyathena/s3fs/result_set.py b/pyathena/s3fs/result_set.py
@@ -151,8 +151,7 @@ def _fetch(self) -> None:
 
         description = self.description if self.description else []
         column_types = [d[1] for d in description]
-        column_names = [d[0] for d in description]
-        hints = self._result_set_type_hints
+        col_hints = self._column_type_hints
 
         rows_fetched = 0
         while rows_fetched < self._arraysize:
@@ -165,35 +164,33 @@ def _fetch(self) -> None:
             # AthenaCSVReader returns None for NULL values directly,
             # DefaultCSVReader returns empty string which needs conversion
             if self._csv_reader_class is DefaultCSVReader:
-                converted_row = tuple(
-                    self._converter.convert(
-                        col_type,
-                        value if value != "" else None,
-                        **(
-                            {"type_hint": hints[col_name.lower()]}
-                            if hints and col_name.lower() in hints
-                            else {}
-                        ),
+                if col_hints:
+                    converted_row = tuple(
+                        self._converter.convert(
+                            col_type, value if value != "" else None, type_hint=hint
+                        )
+                        if hint
+                        else self._converter.convert(col_type, value if value != "" else None)
+                        for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
                     )
-                    for col_type, col_name, value in zip(
-                        column_types, column_names, row, strict=False
+                else:
+                    converted_row = tuple(
+                        self._converter.convert(col_type, value if value != "" else None)
+                        for col_type, value in zip(column_types, row, strict=False)
                     )
-                )
             else:
-                converted_row = tuple(
-                    self._converter.convert(
-                        col_type,
-                        value,
-                        **(
-                            {"type_hint": hints[col_name.lower()]}
-                            if hints and col_name.lower() in hints
-                            else {}
-                        ),
+                if col_hints:
+                    converted_row = tuple(
+                        self._converter.convert(col_type, value, type_hint=hint)
+                        if hint
+                        else self._converter.convert(col_type, value)
+                        for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
                     )
-                    for col_type, col_name, value in zip(
-                        column_types, column_names, row, strict=False
+                else:
+                    converted_row = tuple(
+                        self._converter.convert(col_type, value)
+                        for col_type, value in zip(column_types, row, strict=False)
                     )
-                )
             self._rows.append(converted_row)
             rows_fetched += 1