diff --git a/pyrit/score/__init__.py b/pyrit/score/__init__.py
index 9924ea7904..011d1ba6ae 100644
--- a/pyrit/score/__init__.py
+++ b/pyrit/score/__init__.py
@@ -25,6 +25,7 @@
 from pyrit.score.float_scale.self_ask_general_float_scale_scorer import SelfAskGeneralFloatScaleScorer
 from pyrit.score.float_scale.self_ask_likert_scorer import LikertScaleEvalFiles, LikertScalePaths, SelfAskLikertScorer
 from pyrit.score.float_scale.self_ask_scale_scorer import SelfAskScaleScorer
+from pyrit.score.response_handler import JsonSchemaResponseHandler, ResponseHandler
 from pyrit.score.scorer import Scorer
 from pyrit.score.scorer_evaluation.metrics_type import MetricsType, RegistryUpdateBehavior
 from pyrit.score.scorer_evaluation.scorer_metrics import (
@@ -143,6 +144,7 @@ def __getattr__(name: str) -> object:
     "HumanLabeledDataset",
     "HumanLabeledEntry",
     "InsecureCodeScorer",
+    "JsonSchemaResponseHandler",
     "LikertScaleEvalFiles",
     "LikertScalePaths",
     "MarkdownInjectionScorer",
@@ -159,6 +161,7 @@ def __getattr__(name: str) -> object:
     "QuestionAnswerScorer",
     "RegexScorer",
     "RegistryUpdateBehavior",
+    "ResponseHandler",
     "Scorer",
     "ScorerEvalDatasetFiles",
     "ScorerEvaluator",
diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py
index 7dbe8cf29f..e5600fbd1d 100644
--- a/pyrit/score/float_scale/float_scale_scorer.py
+++ b/pyrit/score/float_scale/float_scale_scorer.py
@@ -3,21 +3,15 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, ClassVar
 
-from pyrit.exceptions.exception_classes import InvalidJsonException
 from pyrit.models import (
-    JsonSchemaDefinition,
     Message,
-    PromptDataType,
     Score,
-    UnvalidatedScore,
 )
 from pyrit.score.scorer import Scorer
 
 if TYPE_CHECKING:
-    from uuid import UUID
-
     from pyrit.prompt_target.common.prompt_target import PromptTarget
     from pyrit.score.scorer_evaluation.scorer_metrics import HarmScorerMetrics
     from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
@@ -44,6 +38,10 @@ class FloatScaleScorer(Scorer):
     "blocked = True") should override ``_score_piece_async`` or ``_build_fallback_score``.
     """
 
+    # Marks scores produced by this scorer as numeric so the shared LLM round-trip validates that
+    # the returned score value is parsable as a float. Float-scale scorers require this.
+    _score_value_is_numeric: ClassVar[bool] = True
+
     def __init__(self, *, validator: ScorerPromptValidator, chat_target: PromptTarget | None = None) -> None:
         """
         Initialize the FloatScaleScorer.
@@ -138,50 +136,3 @@ def get_scorer_metrics(self) -> HarmScorerMetrics | None:
             eval_hash=eval_hash,
             harm_category=self.evaluation_file_mapping.harm_category,
         )
-
-    async def _score_value_with_llm_async(
-        self,
-        *,
-        prompt_target: PromptTarget,
-        system_prompt: str,
-        message_value: str,
-        message_data_type: PromptDataType,
-        scored_prompt_id: str | UUID,
-        prepended_text_message_piece: str | None = None,
-        category: str | UUID | None = None,
-        objective: str | None = None,
-        score_value_output_key: str = "score_value",
-        rationale_output_key: str = "rationale",
-        description_output_key: str = "description",
-        metadata_output_key: str = "metadata",
-        category_output_key: str = "category",
-        response_json_schema: JsonSchemaDefinition | None = None,
-    ) -> UnvalidatedScore:
-        score: UnvalidatedScore | None = None
-        try:
-            score = await super()._score_value_with_llm_async(
-                prompt_target=prompt_target,
-                system_prompt=system_prompt,
-                message_value=message_value,
-                message_data_type=message_data_type,
-                scored_prompt_id=scored_prompt_id,
-                prepended_text_message_piece=prepended_text_message_piece,
-                category=category,
-                objective=objective,
-                score_value_output_key=score_value_output_key,
-                rationale_output_key=rationale_output_key,
-                description_output_key=description_output_key,
-                metadata_output_key=metadata_output_key,
-                category_output_key=category_output_key,
-                response_json_schema=response_json_schema,
-            )
-            if score is None:
-                raise ValueError("Score returned None")
-            # raise an exception if it's not parsable as a float
-            float(score.raw_score_value)
-        except ValueError:
-            score_value = score.raw_score_value if score else "None"
-            raise InvalidJsonException(
-                message=(f"Invalid JSON response, score_value should be a float not this: {score_value}")
-            ) from None
-        return score
diff --git a/pyrit/score/llm_scoring.py b/pyrit/score/llm_scoring.py
new file mode 100644
index 0000000000..3dd52e60f7
--- /dev/null
+++ b/pyrit/score/llm_scoring.py
@@ -0,0 +1,179 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from __future__ import annotations
+
+import uuid
+from typing import TYPE_CHECKING, Any
+
+from pyrit.exceptions import InvalidJsonException, pyrit_json_retry
+from pyrit.models import JSON_SCHEMA_METADATA_KEY, Message, MessagePiece
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from pyrit.models import (
+        ComponentIdentifier,
+        JsonSchemaDefinition,
+        PromptDataType,
+        UnvalidatedScore,
+    )
+    from pyrit.prompt_target import PromptTarget
+    from pyrit.score.response_handler import ResponseHandler
+
+
+async def _run_llm_scoring_async(
+    *,
+    chat_target: PromptTarget,
+    system_prompt: str,
+    response_handler: ResponseHandler,
+    value: str,
+    data_type: PromptDataType,
+    scored_prompt_id: str | uuid.UUID,
+    scorer_identifier: ComponentIdentifier,
+    prepended_text: str | None = None,
+    category: Sequence[str] | str | None = None,
+    objective: str | None = None,
+    response_json_schema: JsonSchemaDefinition | None = None,
+    numeric_value: bool = False,
+) -> UnvalidatedScore:
+    """
+    Perform a single scoring round-trip against an LLM target and parse the result.
+
+    This is the shared LLM evaluation mechanism: it sets the system prompt on the target,
+    sends the value to be scored, applies the standard JSON retry behavior, and delegates
+    parsing to ``response_handler``. It is intentionally stateless and independent of any
+    particular ``Scorer`` so that scorers can compose it without inheriting LLM machinery.
+
+    Args:
+        chat_target (PromptTarget): The target LLM to send the message to.
+        system_prompt (str): The system-level prompt that guides the target LLM.
+        response_handler (ResponseHandler): Parser that turns the target's raw text into an
+            ``UnvalidatedScore``.
+        value (str): The content to be scored (e.g. text, image path, audio path).
+        data_type (PromptDataType): The data type of ``value`` (e.g. "text", "image_path").
+        scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored.
+        scorer_identifier (ComponentIdentifier): Identifier of the calling scorer, stored on
+            the resulting score.
+        prepended_text (str | None): Text context to prepend before ``value`` as a separate
+            piece. Useful for adding objective/context when scoring non-text content.
+            Defaults to None.
+        category (Sequence[str] | str | None): The category of the score. May instead be parsed
+            from the response; supplying both is an error. Defaults to None.
+        objective (str | None): The objective associated with the score, used for
+            contextualizing the result. Defaults to None.
+        response_json_schema (JsonSchemaDefinition | None): Optional JSON schema constraining the
+            response. Forwarded to the request metadata; targets that natively support JSON
+            schemas enforce it, others have it omitted by normalization. Defaults to None.
+        numeric_value (bool): When True, the parsed ``raw_score_value`` must be parsable as a
+            float; otherwise an ``InvalidJsonException`` is raised (without retrying). Defaults
+            to False.
+
+    Returns:
+        UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be
+            normalized and validated by the caller.
+
+    Raises:
+        InvalidJsonException: If the response is not valid JSON, is missing required keys, or
+            (when ``numeric_value`` is True) the score value is not a float.
+        Exception: For other unexpected errors during scoring.
+    """
+    score = await _send_and_parse_async(
+        chat_target=chat_target,
+        system_prompt=system_prompt,
+        response_handler=response_handler,
+        value=value,
+        data_type=data_type,
+        scored_prompt_id=scored_prompt_id,
+        scorer_identifier=scorer_identifier,
+        prepended_text=prepended_text,
+        category=category,
+        objective=objective,
+        response_json_schema=response_json_schema,
+    )
+
+    if numeric_value:
+        try:
+            # Raise an exception if the score value is not parsable as a float. This mirrors the
+            # historical float-scale behavior: the check runs outside the JSON retry, so a
+            # well-formed-but-non-numeric response is not retried.
+            float(score.raw_score_value)
+        except ValueError:
+            raise InvalidJsonException(
+                message=f"Invalid JSON response, score_value should be a float not this: {score.raw_score_value}"
+            ) from None
+
+    return score
+
+
+@pyrit_json_retry
+async def _send_and_parse_async(
+    *,
+    chat_target: PromptTarget,
+    system_prompt: str,
+    response_handler: ResponseHandler,
+    value: str,
+    data_type: PromptDataType,
+    scored_prompt_id: str | uuid.UUID,
+    scorer_identifier: ComponentIdentifier,
+    prepended_text: str | None = None,
+    category: Sequence[str] | str | None = None,
+    objective: str | None = None,
+    response_json_schema: JsonSchemaDefinition | None = None,
+) -> UnvalidatedScore:
+    conversation_id = str(uuid.uuid4())
+
+    chat_target.set_system_prompt(
+        system_prompt=system_prompt,
+        conversation_id=conversation_id,
+    )
+    prompt_metadata: dict[str, Any] = {"response_format": "json"}
+    if response_json_schema is not None:
+        # Always forward the schema; the target's normalization pipeline omits it
+        # when the target cannot natively enforce a JSON schema.
+        prompt_metadata[JSON_SCHEMA_METADATA_KEY] = response_json_schema
+
+    # Build message pieces - prepended text context first (if provided), then the main message being scored
+    message_pieces: list[MessagePiece] = []
+
+    # Add prepended text context piece if provided (e.g., objective context for non-text scoring)
+    if prepended_text:
+        message_pieces.append(
+            MessagePiece(
+                role="user",
+                original_value=prepended_text,
+                original_value_data_type="text",
+                converted_value_data_type="text",
+                conversation_id=conversation_id,
+                prompt_metadata=prompt_metadata,
+            )
+        )
+
+    # Add the main message piece being scored
+    message_pieces.append(
+        MessagePiece(
+            role="user",
+            original_value=value,
+            original_value_data_type=data_type,
+            converted_value_data_type=data_type,
+            conversation_id=conversation_id,
+            prompt_metadata=prompt_metadata,
+        )
+    )
+
+    scorer_llm_request = Message(message_pieces=message_pieces)
+    try:
+        response = await chat_target.send_prompt_async(message=scorer_llm_request)
+    except Exception as ex:
+        raise Exception(f"Error scoring prompt with original prompt ID: {scored_prompt_id}") from ex
+
+    # Get the text piece which contains the JSON response containing the score_value and rationale from the LLM
+    text_piece = next(piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text")
+
+    return response_handler.parse(
+        response_text=text_piece.converted_value,
+        scorer_identifier=scorer_identifier,
+        scored_prompt_id=scored_prompt_id,
+        category=category,
+        objective=objective,
+    )
diff --git a/pyrit/score/response_handler.py b/pyrit/score/response_handler.py
new file mode 100644
index 0000000000..fd6fe50df0
--- /dev/null
+++ b/pyrit/score/response_handler.py
@@ -0,0 +1,182 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from __future__ import annotations
+
+import abc
+import json
+from abc import abstractmethod
+from typing import TYPE_CHECKING
+
+from pyrit.exceptions import InvalidJsonException, remove_markdown_json
+from pyrit.models import UnvalidatedScore
+
+if TYPE_CHECKING:
+    import uuid
+    from collections.abc import Sequence
+
+    from pyrit.models import ComponentIdentifier
+
+
+class ResponseHandler(abc.ABC):
+    """
+    Turns the raw text a scoring target returned into an ``UnvalidatedScore``.
+
+    A ResponseHandler owns response parsing and nothing else: given the text produced by a
+    scoring LLM, it produces the unvalidated score object the scorer expects. It does not
+    perform the LLM round-trip, build the system prompt, or decide how the resulting score
+    branches. Different handlers implement different wire formats (e.g. JSON today).
+    """
+
+    @abstractmethod
+    def parse(
+        self,
+        *,
+        response_text: str,
+        scorer_identifier: ComponentIdentifier,
+        scored_prompt_id: str | uuid.UUID,
+        category: Sequence[str] | str | None = None,
+        objective: str | None = None,
+    ) -> UnvalidatedScore:
+        """
+        Parse raw target output into an ``UnvalidatedScore``.
+
+        Args:
+            response_text (str): The raw text returned by the scoring target.
+            scorer_identifier (ComponentIdentifier): Identifier of the scorer that produced the
+                request, stored on the resulting score.
+            scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored.
+            category (Sequence[str] | str | None): The category of the score. May instead be parsed
+                from the response; supplying both is an error. Defaults to None.
+            objective (str | None): The objective associated with the score, used for
+                contextualizing the result. Defaults to None.
+
+        Returns:
+            UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be
+                normalized and validated by the caller.
+        """
+        ...
+
+
+class JsonSchemaResponseHandler(ResponseHandler):
+    """
+    Default ResponseHandler that parses JSON scoring responses.
+
+    Reproduces PyRIT's historical scoring-response parsing: strip any markdown code fences,
+    ``json.loads`` the text, then read the score value, rationale, optional description,
+    category, and metadata from configurable keys.
+    """
+
+    def __init__(
+        self,
+        *,
+        score_value_output_key: str = "score_value",
+        rationale_output_key: str = "rationale",
+        description_output_key: str = "description",
+        metadata_output_key: str = "metadata",
+        category_output_key: str = "category",
+    ) -> None:
+        """
+        Initialize the handler with the JSON keys to read from the response.
+
+        Args:
+            score_value_output_key (str): Key holding the score value. Defaults to "score_value".
+            rationale_output_key (str): Key holding the rationale. Defaults to "rationale".
+            description_output_key (str): Key holding the description. Defaults to "description".
+            metadata_output_key (str): Key holding the metadata. Defaults to "metadata".
+            category_output_key (str): Key holding the category. Defaults to "category".
+        """
+        self._score_value_output_key = score_value_output_key
+        self._rationale_output_key = rationale_output_key
+        self._description_output_key = description_output_key
+        self._metadata_output_key = metadata_output_key
+        self._category_output_key = category_output_key
+
+    def parse(
+        self,
+        *,
+        response_text: str,
+        scorer_identifier: ComponentIdentifier,
+        scored_prompt_id: str | uuid.UUID,
+        category: Sequence[str] | str | None = None,
+        objective: str | None = None,
+    ) -> UnvalidatedScore:
+        """
+        Parse a JSON scoring response into an ``UnvalidatedScore``.
+
+        Args:
+            response_text (str): The raw text returned by the scoring target.
+            scorer_identifier (ComponentIdentifier): Identifier of the scorer that produced the
+                request, stored on the resulting score.
+            scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored.
+            category (Sequence[str] | str | None): The category of the score. May instead be parsed
+                from the response; supplying both is an error. Defaults to None.
+            objective (str | None): The objective associated with the score, used for
+                contextualizing the result. Defaults to None.
+
+        Returns:
+            UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be
+                normalized and validated by the caller.
+
+        Raises:
+            ValueError: If a category is present in both the response and the argument, or the
+                parsed category is not a string or a list of strings.
+            InvalidJsonException: If the response is not valid JSON or is missing a required key.
+        """
+        response_json = remove_markdown_json(response_text)
+        try:
+            parsed_response = json.loads(response_json)
+            category_response = parsed_response.get(self._category_output_key)
+
+            if category_response and category:
+                raise ValueError("Category is present in the response and an argument")
+
+            # Validate and normalize category to a list of strings
+            cat_val = category_response if category_response is not None else category
+            normalized_category: list[str] | None
+            if cat_val is None:
+                normalized_category = None
+            elif isinstance(cat_val, str):
+                normalized_category = [cat_val]
+            elif isinstance(cat_val, list):
+                if not all(isinstance(x, str) for x in cat_val):
+                    raise ValueError("'category' must be a string or a list of strings")
+                normalized_category = cat_val  # type: ignore[ty:invalid-assignment]
+            else:
+                # JSON must yield either a string or a list of strings
+                raise ValueError("'category' must be a string or a list of strings")
+
+            # Normalize metadata to a dictionary with string keys and string/int/float values
+            raw_md = parsed_response.get(self._metadata_output_key)
+            normalized_md: dict[str, str | int | float] | None
+            if raw_md is None:
+                normalized_md = None
+            elif isinstance(raw_md, dict):
+                # Coerce keys to str and filter to str/int/float values only
+                normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int, float))}
+                # If dictionary becomes empty after filtering, keep as empty dict
+            elif isinstance(raw_md, (str, int, float)):
+                # Wrap primitive metadata into a namespaced field
+                normalized_md = {"metadata": raw_md}
+            else:
+                # Unrecognized metadata shape; drop to avoid downstream errors
+                normalized_md = None
+
+            score = UnvalidatedScore(
+                raw_score_value=str(parsed_response[self._score_value_output_key]),
+                score_value_description=parsed_response.get(self._description_output_key),
+                score_category=normalized_category,
+                score_rationale=parsed_response[self._rationale_output_key],
+                scorer_class_identifier=scorer_identifier,
+                score_metadata=normalized_md,
+                message_piece_id=scored_prompt_id,
+                objective=objective,
+            )
+
+        except json.JSONDecodeError:
+            raise InvalidJsonException(message=f"Invalid JSON response: {response_json}") from None
+
+        except KeyError:
+            raise InvalidJsonException(message=f"Invalid JSON response, missing Key: {response_json}") from None
+
+        return score
diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index 72c948cbc3..f0d249c4ad 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -5,9 +5,7 @@
 
 import abc
 import asyncio
-import json
 import logging
-import uuid
 from abc import abstractmethod
 from typing import (
     TYPE_CHECKING,
@@ -16,15 +14,9 @@
     cast,
 )
 
-from pyrit.exceptions import (
-    InvalidJsonException,
-    PyritException,
-    pyrit_json_retry,
-    remove_markdown_json,
-)
+from pyrit.exceptions import PyritException
 from pyrit.memory import CentralMemory, MemoryInterface
 from pyrit.models import (
-    JSON_SCHEMA_METADATA_KEY,
     ChatMessageRole,
     ComponentIdentifier,
     Identifiable,
@@ -40,6 +32,8 @@
 )
 from pyrit.prompt_target.batch_helper import batch_task_async
 from pyrit.prompt_target.common.target_requirements import TargetRequirements
+from pyrit.score.llm_scoring import _run_llm_scoring_async
+from pyrit.score.response_handler import JsonSchemaResponseHandler
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -668,7 +662,6 @@ def scale_value_float(self, value: float, min_value: float, max_value: float) ->
 
         return (value - min_value) / (max_value - min_value)
 
-    @pyrit_json_retry
     async def _score_value_with_llm_async(
         self,
         *,
@@ -690,6 +683,9 @@ async def _score_value_with_llm_async(
         """
         Send a request to a target, and take care of retries.
 
+        This is a thin internal forwarder to ``_run_llm_scoring_async``. It remains only so that
+        scorers that have not yet been migrated to compose the helper directly keep working.
+
         The scorer target response should be JSON with value, rationale, and optional metadata and
         description fields.
 
@@ -733,117 +729,29 @@ async def _score_value_with_llm_async(
             InvalidJsonException: If the response is not valid JSON.
             Exception: For other unexpected errors during scoring.
         """
-        conversation_id = str(uuid.uuid4())
-
-        prompt_target.set_system_prompt(
-            system_prompt=system_prompt,
-            conversation_id=conversation_id,
+        response_handler = JsonSchemaResponseHandler(
+            score_value_output_key=score_value_output_key,
+            rationale_output_key=rationale_output_key,
+            description_output_key=description_output_key,
+            metadata_output_key=metadata_output_key,
+            category_output_key=category_output_key,
         )
-        prompt_metadata: dict[str, Any] = {"response_format": "json"}
-        if response_json_schema is not None:
-            # Always forward the schema; the target's normalization pipeline omits it
-            # when the target cannot natively enforce a JSON schema.
-            prompt_metadata[JSON_SCHEMA_METADATA_KEY] = response_json_schema
-
-        # Build message pieces - prepended text context first (if provided), then the main message being scored
-        message_pieces: list[MessagePiece] = []
-
-        # Add prepended text context piece if provided (e.g., objective context for non-text scoring)
-        if prepended_text_message_piece:
-            message_pieces.append(
-                MessagePiece(
-                    role="user",
-                    original_value=prepended_text_message_piece,
-                    original_value_data_type="text",
-                    converted_value_data_type="text",
-                    conversation_id=conversation_id,
-                    prompt_metadata=prompt_metadata,
-                )
-            )
 
-        # Add the main message piece being scored
-        message_pieces.append(
-            MessagePiece(
-                role="user",
-                original_value=message_value,
-                original_value_data_type=message_data_type,
-                converted_value_data_type=message_data_type,
-                conversation_id=conversation_id,
-                prompt_metadata=prompt_metadata,
-            )
+        return await _run_llm_scoring_async(
+            chat_target=prompt_target,
+            system_prompt=system_prompt,
+            response_handler=response_handler,
+            value=message_value,
+            data_type=message_data_type,
+            scored_prompt_id=scored_prompt_id,
+            scorer_identifier=self.get_identifier(),
+            prepended_text=prepended_text_message_piece,
+            category=category,
+            objective=objective,
+            response_json_schema=response_json_schema,
+            numeric_value=getattr(self, "_score_value_is_numeric", False),
         )
 
-        scorer_llm_request = Message(message_pieces=message_pieces)
-        try:
-            response = await prompt_target.send_prompt_async(message=scorer_llm_request)
-        except Exception as ex:
-            raise Exception(f"Error scoring prompt with original prompt ID: {scored_prompt_id}") from ex
-
-        response_json: str = ""
-        try:
-            # Get the text piece which contains the JSON response containing the score_value and rationale from the LLM
-            text_piece = next(
-                piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text"
-            )
-            response_json = text_piece.converted_value
-
-            response_json = remove_markdown_json(response_json)
-            parsed_response = json.loads(response_json)
-            category_response = parsed_response.get(category_output_key)
-
-            if category_response and category:
-                raise ValueError("Category is present in the response and an argument")
-
-            # Validate and normalize category to a list of strings
-            cat_val = category_response if category_response is not None else category
-            normalized_category: list[str] | None
-            if cat_val is None:
-                normalized_category = None
-            elif isinstance(cat_val, str):
-                normalized_category = [cat_val]
-            elif isinstance(cat_val, list):
-                if not all(isinstance(x, str) for x in cat_val):
-                    raise ValueError("'category' must be a string or a list of strings")
-                normalized_category = cat_val  # type: ignore[ty:invalid-assignment]
-            else:
-                # JSON must yield either a string or a list of strings
-                raise ValueError("'category' must be a string or a list of strings")
-
-            # Normalize metadata to a dictionary with string keys and string/int/float values
-            raw_md = parsed_response.get(metadata_output_key)
-            normalized_md: dict[str, str | int | float] | None
-            if raw_md is None:
-                normalized_md = None
-            elif isinstance(raw_md, dict):
-                # Coerce keys to str and filter to str/int/float values only
-                normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int, float))}
-                # If dictionary becomes empty after filtering, keep as empty dict
-            elif isinstance(raw_md, (str, int, float)):
-                # Wrap primitive metadata into a namespaced field
-                normalized_md = {"metadata": raw_md}
-            else:
-                # Unrecognized metadata shape; drop to avoid downstream errors
-                normalized_md = None
-
-            score = UnvalidatedScore(
-                raw_score_value=str(parsed_response[score_value_output_key]),
-                score_value_description=parsed_response.get(description_output_key),
-                score_category=normalized_category,
-                score_rationale=parsed_response[rationale_output_key],
-                scorer_class_identifier=self.get_identifier(),
-                score_metadata=normalized_md,
-                message_piece_id=scored_prompt_id,
-                objective=objective,
-            )
-
-        except json.JSONDecodeError:
-            raise InvalidJsonException(message=f"Invalid JSON response: {response_json}") from None
-
-        except KeyError:
-            raise InvalidJsonException(message=f"Invalid JSON response, missing Key: {response_json}") from None
-
-        return score
-
     def _extract_objective_from_response(self, response: Message) -> str:
         """
         Extract an objective from the response using the last request (if it exists).
diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py
index 08f28b5795..b1988d18a1 100644
--- a/pyrit/score/true_false/self_ask_true_false_scorer.py
+++ b/pyrit/score/true_false/self_ask_true_false_scorer.py
@@ -12,6 +12,8 @@
 from pyrit.common.path import SCORER_SEED_PROMPT_PATH
 from pyrit.models import ComponentIdentifier, MessagePiece, Score, SeedPrompt
 from pyrit.prompt_target import CHAT_TARGET_REQUIREMENTS, PromptTarget
+from pyrit.score.llm_scoring import _run_llm_scoring_async
+from pyrit.score.response_handler import JsonSchemaResponseHandler
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
 from pyrit.score.true_false.true_false_score_aggregator import (
     TrueFalseAggregatorFunc,
@@ -223,13 +225,15 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: st
             scoring_value = f"objective: {objective}\nresponse: {message_piece.converted_value}"
             scoring_data_type = "text"
 
-        unvalidated_score = await self._score_value_with_llm_async(
-            prompt_target=self._prompt_target,
+        unvalidated_score = await _run_llm_scoring_async(
+            chat_target=self._prompt_target,
             system_prompt=self._system_prompt,
-            message_value=scoring_value,
-            message_data_type=scoring_data_type,
+            response_handler=JsonSchemaResponseHandler(),
+            value=scoring_value,
+            data_type=scoring_data_type,
             scored_prompt_id=message_piece.id,
-            prepended_text_message_piece=prepended_text,
+            scorer_identifier=self.get_identifier(),
+            prepended_text=prepended_text,
             category=self._score_category,
             objective=objective,
             response_json_schema=self._response_json_schema,
diff --git a/tests/unit/score/test_scorer.py b/tests/unit/score/test_scorer.py
index 6491ccefeb..741c41135b 100644
--- a/tests/unit/score/test_scorer.py
+++ b/tests/unit/score/test_scorer.py
@@ -239,7 +239,9 @@ async def test_scorer_remove_markdown_json_called(good_json):
 
     scorer = MockScorer()
 
-    with patch("pyrit.score.scorer.remove_markdown_json", wraps=remove_markdown_json) as mock_remove_markdown_json:
+    with patch(
+        "pyrit.score.response_handler.remove_markdown_json", wraps=remove_markdown_json
+    ) as mock_remove_markdown_json:
         await scorer._score_value_with_llm_async(
             prompt_target=chat_target,
             system_prompt="system_prompt",