From a876261b05b6ef67f53f49e350f8c1aca04d3c6b Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Wed, 1 Jul 2026 23:40:06 -0700 Subject: [PATCH 1/3] Refactor scorer LLM round-trip into ResponseHandler + run_llm_scoring_async PR A of the scorer-architecture refactor (Option 5: scorer-owned composition). Moves the LLM evaluation mechanism and JSON response parsing off the base Scorer class so the base no longer carries LLM-only machinery (retry, system-prompt setting, JSON parsing). - Add pyrit/score/response_handler.py: ResponseHandler ABC + JsonSchemaResponseHandler (response parsing) reproducing the existing JSON parsing exactly. - Add pyrit/score/llm_scoring.py: stateless module-level run_llm_scoring_async (evaluation mechanism) wrapping an inner @pyrit_json_retry round-trip; the optional numeric-value float check runs outside the retry, preserving the old FloatScaleScorer behavior. - Convert Scorer._score_value_with_llm_async into a deprecated thin shim that forwards to run_llm_scoring_async (removed_in 0.17.0). Signature unchanged, so the eight not-yet-migrated scorers keep working (migrated in PR C). - Remove the FloatScaleScorer._score_value_with_llm_async override; replace it with a _score_value_is_numeric class flag the shim reads to apply the float check. - Wire SelfAskTrueFalseScorer to call run_llm_scoring_async directly. Public constructor and behavior unchanged. - Export ResponseHandler, JsonSchemaResponseHandler, run_llm_scoring_async from pyrit.score (additive). - Update test_scorer_remove_markdown_json_called patch target to the new module. No public API change. All existing score tests pass; lints clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/score/__init__.py | 5 + pyrit/score/float_scale/float_scale_scorer.py | 59 +----- pyrit/score/llm_scoring.py | 179 +++++++++++++++++ pyrit/score/response_handler.py | 182 ++++++++++++++++++ pyrit/score/scorer.py | 148 +++----------- .../true_false/self_ask_true_false_scorer.py | 14 +- tests/unit/score/test_scorer.py | 4 +- 7 files changed, 415 insertions(+), 176 deletions(-) create mode 100644 pyrit/score/llm_scoring.py create mode 100644 pyrit/score/response_handler.py diff --git a/pyrit/score/__init__.py b/pyrit/score/__init__.py index 9924ea7904..f8c5f3dd5b 100644 --- a/pyrit/score/__init__.py +++ b/pyrit/score/__init__.py @@ -25,6 +25,8 @@ from pyrit.score.float_scale.self_ask_general_float_scale_scorer import SelfAskGeneralFloatScaleScorer from pyrit.score.float_scale.self_ask_likert_scorer import LikertScaleEvalFiles, LikertScalePaths, SelfAskLikertScorer from pyrit.score.float_scale.self_ask_scale_scorer import SelfAskScaleScorer +from pyrit.score.llm_scoring import run_llm_scoring_async +from pyrit.score.response_handler import JsonSchemaResponseHandler, ResponseHandler from pyrit.score.scorer import Scorer from pyrit.score.scorer_evaluation.metrics_type import MetricsType, RegistryUpdateBehavior from pyrit.score.scorer_evaluation.scorer_metrics import ( @@ -143,6 +145,7 @@ def __getattr__(name: str) -> object: "HumanLabeledDataset", "HumanLabeledEntry", "InsecureCodeScorer", + "JsonSchemaResponseHandler", "LikertScaleEvalFiles", "LikertScalePaths", "MarkdownInjectionScorer", @@ -159,6 +162,8 @@ def __getattr__(name: str) -> object: "QuestionAnswerScorer", "RegexScorer", "RegistryUpdateBehavior", + "ResponseHandler", + "run_llm_scoring_async", "Scorer", "ScorerEvalDatasetFiles", "ScorerEvaluator", diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py index 7dbe8cf29f..e5600fbd1d 100644 --- a/pyrit/score/float_scale/float_scale_scorer.py +++ b/pyrit/score/float_scale/float_scale_scorer.py @@ -3,21 +3,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar -from pyrit.exceptions.exception_classes import InvalidJsonException from pyrit.models import ( - JsonSchemaDefinition, Message, - PromptDataType, Score, - UnvalidatedScore, ) from pyrit.score.scorer import Scorer if TYPE_CHECKING: - from uuid import UUID - from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.score.scorer_evaluation.scorer_metrics import HarmScorerMetrics from pyrit.score.scorer_prompt_validator import ScorerPromptValidator @@ -44,6 +38,10 @@ class FloatScaleScorer(Scorer): "blocked = True") should override ``_score_piece_async`` or ``_build_fallback_score``. """ + # Marks scores produced by this scorer as numeric so the shared LLM round-trip validates that + # the returned score value is parsable as a float. Float-scale scorers require this. + _score_value_is_numeric: ClassVar[bool] = True + def __init__(self, *, validator: ScorerPromptValidator, chat_target: PromptTarget | None = None) -> None: """ Initialize the FloatScaleScorer. @@ -138,50 +136,3 @@ def get_scorer_metrics(self) -> HarmScorerMetrics | None: eval_hash=eval_hash, harm_category=self.evaluation_file_mapping.harm_category, ) - - async def _score_value_with_llm_async( - self, - *, - prompt_target: PromptTarget, - system_prompt: str, - message_value: str, - message_data_type: PromptDataType, - scored_prompt_id: str | UUID, - prepended_text_message_piece: str | None = None, - category: str | UUID | None = None, - objective: str | None = None, - score_value_output_key: str = "score_value", - rationale_output_key: str = "rationale", - description_output_key: str = "description", - metadata_output_key: str = "metadata", - category_output_key: str = "category", - response_json_schema: JsonSchemaDefinition | None = None, - ) -> UnvalidatedScore: - score: UnvalidatedScore | None = None - try: - score = await super()._score_value_with_llm_async( - prompt_target=prompt_target, - system_prompt=system_prompt, - message_value=message_value, - message_data_type=message_data_type, - scored_prompt_id=scored_prompt_id, - prepended_text_message_piece=prepended_text_message_piece, - category=category, - objective=objective, - score_value_output_key=score_value_output_key, - rationale_output_key=rationale_output_key, - description_output_key=description_output_key, - metadata_output_key=metadata_output_key, - category_output_key=category_output_key, - response_json_schema=response_json_schema, - ) - if score is None: - raise ValueError("Score returned None") - # raise an exception if it's not parsable as a float - float(score.raw_score_value) - except ValueError: - score_value = score.raw_score_value if score else "None" - raise InvalidJsonException( - message=(f"Invalid JSON response, score_value should be a float not this: {score_value}") - ) from None - return score diff --git a/pyrit/score/llm_scoring.py b/pyrit/score/llm_scoring.py new file mode 100644 index 0000000000..d905cf8518 --- /dev/null +++ b/pyrit/score/llm_scoring.py @@ -0,0 +1,179 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from __future__ import annotations + +import uuid +from typing import TYPE_CHECKING, Any + +from pyrit.exceptions import InvalidJsonException, pyrit_json_retry +from pyrit.models import JSON_SCHEMA_METADATA_KEY, Message, MessagePiece + +if TYPE_CHECKING: + from collections.abc import Sequence + + from pyrit.models import ( + ComponentIdentifier, + JsonSchemaDefinition, + PromptDataType, + UnvalidatedScore, + ) + from pyrit.prompt_target import PromptTarget + from pyrit.score.response_handler import ResponseHandler + + +async def run_llm_scoring_async( + *, + target: PromptTarget, + system_prompt: str, + response_handler: ResponseHandler, + value: str, + data_type: PromptDataType, + scored_prompt_id: str | uuid.UUID, + scorer_identifier: ComponentIdentifier, + prepended_text: str | None = None, + category: Sequence[str] | str | None = None, + objective: str | None = None, + response_json_schema: JsonSchemaDefinition | None = None, + numeric_value: bool = False, +) -> UnvalidatedScore: + """ + Perform a single scoring round-trip against an LLM target and parse the result. + + This is the shared LLM evaluation mechanism: it sets the system prompt on the target, + sends the value to be scored, applies the standard JSON retry behavior, and delegates + parsing to ``response_handler``. It is intentionally stateless and independent of any + particular ``Scorer`` so that scorers can compose it without inheriting LLM machinery. + + Args: + target (PromptTarget): The target LLM to send the message to. + system_prompt (str): The system-level prompt that guides the target LLM. + response_handler (ResponseHandler): Parser that turns the target's raw text into an + ``UnvalidatedScore``. + value (str): The content to be scored (e.g. text, image path, audio path). + data_type (PromptDataType): The data type of ``value`` (e.g. "text", "image_path"). + scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored. + scorer_identifier (ComponentIdentifier): Identifier of the calling scorer, stored on + the resulting score. + prepended_text (str | None): Text context to prepend before ``value`` as a separate + piece. Useful for adding objective/context when scoring non-text content. + Defaults to None. + category (Sequence[str] | str | None): The category of the score. May instead be parsed + from the response; supplying both is an error. Defaults to None. + objective (str | None): The objective associated with the score, used for + contextualizing the result. Defaults to None. + response_json_schema (JsonSchemaDefinition | None): Optional JSON schema constraining the + response. Forwarded to the request metadata; targets that natively support JSON + schemas enforce it, others have it omitted by normalization. Defaults to None. + numeric_value (bool): When True, the parsed ``raw_score_value`` must be parsable as a + float; otherwise an ``InvalidJsonException`` is raised (without retrying). Defaults + to False. + + Returns: + UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be + normalized and validated by the caller. + + Raises: + InvalidJsonException: If the response is not valid JSON, is missing required keys, or + (when ``numeric_value`` is True) the score value is not a float. + Exception: For other unexpected errors during scoring. + """ + score = await _send_and_parse_async( + target=target, + system_prompt=system_prompt, + response_handler=response_handler, + value=value, + data_type=data_type, + scored_prompt_id=scored_prompt_id, + scorer_identifier=scorer_identifier, + prepended_text=prepended_text, + category=category, + objective=objective, + response_json_schema=response_json_schema, + ) + + if numeric_value: + try: + # Raise an exception if the score value is not parsable as a float. This mirrors the + # historical float-scale behavior: the check runs outside the JSON retry, so a + # well-formed-but-non-numeric response is not retried. + float(score.raw_score_value) + except ValueError: + raise InvalidJsonException( + message=f"Invalid JSON response, score_value should be a float not this: {score.raw_score_value}" + ) from None + + return score + + +@pyrit_json_retry +async def _send_and_parse_async( + *, + target: PromptTarget, + system_prompt: str, + response_handler: ResponseHandler, + value: str, + data_type: PromptDataType, + scored_prompt_id: str | uuid.UUID, + scorer_identifier: ComponentIdentifier, + prepended_text: str | None = None, + category: Sequence[str] | str | None = None, + objective: str | None = None, + response_json_schema: JsonSchemaDefinition | None = None, +) -> UnvalidatedScore: + conversation_id = str(uuid.uuid4()) + + target.set_system_prompt( + system_prompt=system_prompt, + conversation_id=conversation_id, + ) + prompt_metadata: dict[str, Any] = {"response_format": "json"} + if response_json_schema is not None: + # Always forward the schema; the target's normalization pipeline omits it + # when the target cannot natively enforce a JSON schema. + prompt_metadata[JSON_SCHEMA_METADATA_KEY] = response_json_schema + + # Build message pieces - prepended text context first (if provided), then the main message being scored + message_pieces: list[MessagePiece] = [] + + # Add prepended text context piece if provided (e.g., objective context for non-text scoring) + if prepended_text: + message_pieces.append( + MessagePiece( + role="user", + original_value=prepended_text, + original_value_data_type="text", + converted_value_data_type="text", + conversation_id=conversation_id, + prompt_metadata=prompt_metadata, + ) + ) + + # Add the main message piece being scored + message_pieces.append( + MessagePiece( + role="user", + original_value=value, + original_value_data_type=data_type, + converted_value_data_type=data_type, + conversation_id=conversation_id, + prompt_metadata=prompt_metadata, + ) + ) + + scorer_llm_request = Message(message_pieces=message_pieces) + try: + response = await target.send_prompt_async(message=scorer_llm_request) + except Exception as ex: + raise Exception(f"Error scoring prompt with original prompt ID: {scored_prompt_id}") from ex + + # Get the text piece which contains the JSON response containing the score_value and rationale from the LLM + text_piece = next(piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text") + + return response_handler.parse( + response_text=text_piece.converted_value, + scorer_identifier=scorer_identifier, + scored_prompt_id=scored_prompt_id, + category=category, + objective=objective, + ) diff --git a/pyrit/score/response_handler.py b/pyrit/score/response_handler.py new file mode 100644 index 0000000000..fd6fe50df0 --- /dev/null +++ b/pyrit/score/response_handler.py @@ -0,0 +1,182 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from __future__ import annotations + +import abc +import json +from abc import abstractmethod +from typing import TYPE_CHECKING + +from pyrit.exceptions import InvalidJsonException, remove_markdown_json +from pyrit.models import UnvalidatedScore + +if TYPE_CHECKING: + import uuid + from collections.abc import Sequence + + from pyrit.models import ComponentIdentifier + + +class ResponseHandler(abc.ABC): + """ + Turns the raw text a scoring target returned into an ``UnvalidatedScore``. + + A ResponseHandler owns response parsing and nothing else: given the text produced by a + scoring LLM, it produces the unvalidated score object the scorer expects. It does not + perform the LLM round-trip, build the system prompt, or decide how the resulting score + branches. Different handlers implement different wire formats (e.g. JSON today). + """ + + @abstractmethod + def parse( + self, + *, + response_text: str, + scorer_identifier: ComponentIdentifier, + scored_prompt_id: str | uuid.UUID, + category: Sequence[str] | str | None = None, + objective: str | None = None, + ) -> UnvalidatedScore: + """ + Parse raw target output into an ``UnvalidatedScore``. + + Args: + response_text (str): The raw text returned by the scoring target. + scorer_identifier (ComponentIdentifier): Identifier of the scorer that produced the + request, stored on the resulting score. + scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored. + category (Sequence[str] | str | None): The category of the score. May instead be parsed + from the response; supplying both is an error. Defaults to None. + objective (str | None): The objective associated with the score, used for + contextualizing the result. Defaults to None. + + Returns: + UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be + normalized and validated by the caller. + """ + ... + + +class JsonSchemaResponseHandler(ResponseHandler): + """ + Default ResponseHandler that parses JSON scoring responses. + + Reproduces PyRIT's historical scoring-response parsing: strip any markdown code fences, + ``json.loads`` the text, then read the score value, rationale, optional description, + category, and metadata from configurable keys. + """ + + def __init__( + self, + *, + score_value_output_key: str = "score_value", + rationale_output_key: str = "rationale", + description_output_key: str = "description", + metadata_output_key: str = "metadata", + category_output_key: str = "category", + ) -> None: + """ + Initialize the handler with the JSON keys to read from the response. + + Args: + score_value_output_key (str): Key holding the score value. Defaults to "score_value". + rationale_output_key (str): Key holding the rationale. Defaults to "rationale". + description_output_key (str): Key holding the description. Defaults to "description". + metadata_output_key (str): Key holding the metadata. Defaults to "metadata". + category_output_key (str): Key holding the category. Defaults to "category". + """ + self._score_value_output_key = score_value_output_key + self._rationale_output_key = rationale_output_key + self._description_output_key = description_output_key + self._metadata_output_key = metadata_output_key + self._category_output_key = category_output_key + + def parse( + self, + *, + response_text: str, + scorer_identifier: ComponentIdentifier, + scored_prompt_id: str | uuid.UUID, + category: Sequence[str] | str | None = None, + objective: str | None = None, + ) -> UnvalidatedScore: + """ + Parse a JSON scoring response into an ``UnvalidatedScore``. + + Args: + response_text (str): The raw text returned by the scoring target. + scorer_identifier (ComponentIdentifier): Identifier of the scorer that produced the + request, stored on the resulting score. + scored_prompt_id (str | uuid.UUID): The ID of the message piece being scored. + category (Sequence[str] | str | None): The category of the score. May instead be parsed + from the response; supplying both is an error. Defaults to None. + objective (str | None): The objective associated with the score, used for + contextualizing the result. Defaults to None. + + Returns: + UnvalidatedScore: The parsed score, whose ``raw_score_value`` still needs to be + normalized and validated by the caller. + + Raises: + ValueError: If a category is present in both the response and the argument, or the + parsed category is not a string or a list of strings. + InvalidJsonException: If the response is not valid JSON or is missing a required key. + """ + response_json = remove_markdown_json(response_text) + try: + parsed_response = json.loads(response_json) + category_response = parsed_response.get(self._category_output_key) + + if category_response and category: + raise ValueError("Category is present in the response and an argument") + + # Validate and normalize category to a list of strings + cat_val = category_response if category_response is not None else category + normalized_category: list[str] | None + if cat_val is None: + normalized_category = None + elif isinstance(cat_val, str): + normalized_category = [cat_val] + elif isinstance(cat_val, list): + if not all(isinstance(x, str) for x in cat_val): + raise ValueError("'category' must be a string or a list of strings") + normalized_category = cat_val # type: ignore[ty:invalid-assignment] + else: + # JSON must yield either a string or a list of strings + raise ValueError("'category' must be a string or a list of strings") + + # Normalize metadata to a dictionary with string keys and string/int/float values + raw_md = parsed_response.get(self._metadata_output_key) + normalized_md: dict[str, str | int | float] | None + if raw_md is None: + normalized_md = None + elif isinstance(raw_md, dict): + # Coerce keys to str and filter to str/int/float values only + normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int, float))} + # If dictionary becomes empty after filtering, keep as empty dict + elif isinstance(raw_md, (str, int, float)): + # Wrap primitive metadata into a namespaced field + normalized_md = {"metadata": raw_md} + else: + # Unrecognized metadata shape; drop to avoid downstream errors + normalized_md = None + + score = UnvalidatedScore( + raw_score_value=str(parsed_response[self._score_value_output_key]), + score_value_description=parsed_response.get(self._description_output_key), + score_category=normalized_category, + score_rationale=parsed_response[self._rationale_output_key], + scorer_class_identifier=scorer_identifier, + score_metadata=normalized_md, + message_piece_id=scored_prompt_id, + objective=objective, + ) + + except json.JSONDecodeError: + raise InvalidJsonException(message=f"Invalid JSON response: {response_json}") from None + + except KeyError: + raise InvalidJsonException(message=f"Invalid JSON response, missing Key: {response_json}") from None + + return score diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 72c948cbc3..937fa1d0d0 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -5,9 +5,7 @@ import abc import asyncio -import json import logging -import uuid from abc import abstractmethod from typing import ( TYPE_CHECKING, @@ -16,15 +14,10 @@ cast, ) -from pyrit.exceptions import ( - InvalidJsonException, - PyritException, - pyrit_json_retry, - remove_markdown_json, -) +from pyrit.common.deprecation import print_deprecation_message +from pyrit.exceptions import PyritException from pyrit.memory import CentralMemory, MemoryInterface from pyrit.models import ( - JSON_SCHEMA_METADATA_KEY, ChatMessageRole, ComponentIdentifier, Identifiable, @@ -40,6 +33,8 @@ ) from pyrit.prompt_target.batch_helper import batch_task_async from pyrit.prompt_target.common.target_requirements import TargetRequirements +from pyrit.score.llm_scoring import run_llm_scoring_async +from pyrit.score.response_handler import JsonSchemaResponseHandler if TYPE_CHECKING: from collections.abc import Sequence @@ -668,7 +663,6 @@ def scale_value_float(self, value: float, min_value: float, max_value: float) -> return (value - min_value) / (max_value - min_value) - @pyrit_json_retry async def _score_value_with_llm_async( self, *, @@ -690,6 +684,10 @@ async def _score_value_with_llm_async( """ Send a request to a target, and take care of retries. + .. deprecated:: 0.17.0 + Use ``run_llm_scoring_async`` with a ``ResponseHandler`` instead. This method forwards + to that helper and will be removed in 0.17.0. + The scorer target response should be JSON with value, rationale, and optional metadata and description fields. @@ -733,116 +731,34 @@ async def _score_value_with_llm_async( InvalidJsonException: If the response is not valid JSON. Exception: For other unexpected errors during scoring. """ - conversation_id = str(uuid.uuid4()) - - prompt_target.set_system_prompt( - system_prompt=system_prompt, - conversation_id=conversation_id, + print_deprecation_message( + old_item="pyrit.score.scorer.Scorer._score_value_with_llm_async", + new_item="pyrit.score.llm_scoring.run_llm_scoring_async", + removed_in="0.17.0", ) - prompt_metadata: dict[str, Any] = {"response_format": "json"} - if response_json_schema is not None: - # Always forward the schema; the target's normalization pipeline omits it - # when the target cannot natively enforce a JSON schema. - prompt_metadata[JSON_SCHEMA_METADATA_KEY] = response_json_schema - - # Build message pieces - prepended text context first (if provided), then the main message being scored - message_pieces: list[MessagePiece] = [] - - # Add prepended text context piece if provided (e.g., objective context for non-text scoring) - if prepended_text_message_piece: - message_pieces.append( - MessagePiece( - role="user", - original_value=prepended_text_message_piece, - original_value_data_type="text", - converted_value_data_type="text", - conversation_id=conversation_id, - prompt_metadata=prompt_metadata, - ) - ) - # Add the main message piece being scored - message_pieces.append( - MessagePiece( - role="user", - original_value=message_value, - original_value_data_type=message_data_type, - converted_value_data_type=message_data_type, - conversation_id=conversation_id, - prompt_metadata=prompt_metadata, - ) + response_handler = JsonSchemaResponseHandler( + score_value_output_key=score_value_output_key, + rationale_output_key=rationale_output_key, + description_output_key=description_output_key, + metadata_output_key=metadata_output_key, + category_output_key=category_output_key, ) - scorer_llm_request = Message(message_pieces=message_pieces) - try: - response = await prompt_target.send_prompt_async(message=scorer_llm_request) - except Exception as ex: - raise Exception(f"Error scoring prompt with original prompt ID: {scored_prompt_id}") from ex - - response_json: str = "" - try: - # Get the text piece which contains the JSON response containing the score_value and rationale from the LLM - text_piece = next( - piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text" - ) - response_json = text_piece.converted_value - - response_json = remove_markdown_json(response_json) - parsed_response = json.loads(response_json) - category_response = parsed_response.get(category_output_key) - - if category_response and category: - raise ValueError("Category is present in the response and an argument") - - # Validate and normalize category to a list of strings - cat_val = category_response if category_response is not None else category - normalized_category: list[str] | None - if cat_val is None: - normalized_category = None - elif isinstance(cat_val, str): - normalized_category = [cat_val] - elif isinstance(cat_val, list): - if not all(isinstance(x, str) for x in cat_val): - raise ValueError("'category' must be a string or a list of strings") - normalized_category = cat_val # type: ignore[ty:invalid-assignment] - else: - # JSON must yield either a string or a list of strings - raise ValueError("'category' must be a string or a list of strings") - - # Normalize metadata to a dictionary with string keys and string/int/float values - raw_md = parsed_response.get(metadata_output_key) - normalized_md: dict[str, str | int | float] | None - if raw_md is None: - normalized_md = None - elif isinstance(raw_md, dict): - # Coerce keys to str and filter to str/int/float values only - normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int, float))} - # If dictionary becomes empty after filtering, keep as empty dict - elif isinstance(raw_md, (str, int, float)): - # Wrap primitive metadata into a namespaced field - normalized_md = {"metadata": raw_md} - else: - # Unrecognized metadata shape; drop to avoid downstream errors - normalized_md = None - - score = UnvalidatedScore( - raw_score_value=str(parsed_response[score_value_output_key]), - score_value_description=parsed_response.get(description_output_key), - score_category=normalized_category, - score_rationale=parsed_response[rationale_output_key], - scorer_class_identifier=self.get_identifier(), - score_metadata=normalized_md, - message_piece_id=scored_prompt_id, - objective=objective, - ) - - except json.JSONDecodeError: - raise InvalidJsonException(message=f"Invalid JSON response: {response_json}") from None - - except KeyError: - raise InvalidJsonException(message=f"Invalid JSON response, missing Key: {response_json}") from None - - return score + return await run_llm_scoring_async( + target=prompt_target, + system_prompt=system_prompt, + response_handler=response_handler, + value=message_value, + data_type=message_data_type, + scored_prompt_id=scored_prompt_id, + scorer_identifier=self.get_identifier(), + prepended_text=prepended_text_message_piece, + category=category, + objective=objective, + response_json_schema=response_json_schema, + numeric_value=getattr(self, "_score_value_is_numeric", False), + ) def _extract_objective_from_response(self, response: Message) -> str: """ diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py index 08f28b5795..14a3621c2a 100644 --- a/pyrit/score/true_false/self_ask_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_true_false_scorer.py @@ -12,6 +12,8 @@ from pyrit.common.path import SCORER_SEED_PROMPT_PATH from pyrit.models import ComponentIdentifier, MessagePiece, Score, SeedPrompt from pyrit.prompt_target import CHAT_TARGET_REQUIREMENTS, PromptTarget +from pyrit.score.llm_scoring import run_llm_scoring_async +from pyrit.score.response_handler import JsonSchemaResponseHandler from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( TrueFalseAggregatorFunc, @@ -223,13 +225,15 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: st scoring_value = f"objective: {objective}\nresponse: {message_piece.converted_value}" scoring_data_type = "text" - unvalidated_score = await self._score_value_with_llm_async( - prompt_target=self._prompt_target, + unvalidated_score = await run_llm_scoring_async( + target=self._prompt_target, system_prompt=self._system_prompt, - message_value=scoring_value, - message_data_type=scoring_data_type, + response_handler=JsonSchemaResponseHandler(), + value=scoring_value, + data_type=scoring_data_type, scored_prompt_id=message_piece.id, - prepended_text_message_piece=prepended_text, + scorer_identifier=self.get_identifier(), + prepended_text=prepended_text, category=self._score_category, objective=objective, response_json_schema=self._response_json_schema, diff --git a/tests/unit/score/test_scorer.py b/tests/unit/score/test_scorer.py index 6491ccefeb..741c41135b 100644 --- a/tests/unit/score/test_scorer.py +++ b/tests/unit/score/test_scorer.py @@ -239,7 +239,9 @@ async def test_scorer_remove_markdown_json_called(good_json): scorer = MockScorer() - with patch("pyrit.score.scorer.remove_markdown_json", wraps=remove_markdown_json) as mock_remove_markdown_json: + with patch( + "pyrit.score.response_handler.remove_markdown_json", wraps=remove_markdown_json + ) as mock_remove_markdown_json: await scorer._score_value_with_llm_async( prompt_target=chat_target, system_prompt="system_prompt", From 4f4b911c2170c149f5048731a66cb36582fcdd8d Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 2 Jul 2026 10:33:24 -0700 Subject: [PATCH 2/3] Keep chat_target kwarg and drop scorer deprecation warning For the upcoming v1.0.0 clean breaking change, PR A keeps `chat_target` (renaming it to `target` was an unnecessary break) and ships no deprecation warnings. - run_llm_scoring_async / _send_and_parse_async: rename the `target` parameter back to `chat_target`; update both call sites (the base Scorer forwarder and SelfAskTrueFalseScorer). - Scorer._score_value_with_llm_async: remove the print_deprecation_message call and its import. It is now a plain, warning-free internal forwarder kept only as a transitional shim until PR C migrates the remaining scorers and deletes it entirely. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/score/llm_scoring.py | 12 ++++++------ pyrit/score/scorer.py | 14 +++----------- .../score/true_false/self_ask_true_false_scorer.py | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/pyrit/score/llm_scoring.py b/pyrit/score/llm_scoring.py index d905cf8518..d52c98d4b2 100644 --- a/pyrit/score/llm_scoring.py +++ b/pyrit/score/llm_scoring.py @@ -24,7 +24,7 @@ async def run_llm_scoring_async( *, - target: PromptTarget, + chat_target: PromptTarget, system_prompt: str, response_handler: ResponseHandler, value: str, @@ -46,7 +46,7 @@ async def run_llm_scoring_async( particular ``Scorer`` so that scorers can compose it without inheriting LLM machinery. Args: - target (PromptTarget): The target LLM to send the message to. + chat_target (PromptTarget): The target LLM to send the message to. system_prompt (str): The system-level prompt that guides the target LLM. response_handler (ResponseHandler): Parser that turns the target's raw text into an ``UnvalidatedScore``. @@ -79,7 +79,7 @@ async def run_llm_scoring_async( Exception: For other unexpected errors during scoring. """ score = await _send_and_parse_async( - target=target, + chat_target=chat_target, system_prompt=system_prompt, response_handler=response_handler, value=value, @@ -109,7 +109,7 @@ async def run_llm_scoring_async( @pyrit_json_retry async def _send_and_parse_async( *, - target: PromptTarget, + chat_target: PromptTarget, system_prompt: str, response_handler: ResponseHandler, value: str, @@ -123,7 +123,7 @@ async def _send_and_parse_async( ) -> UnvalidatedScore: conversation_id = str(uuid.uuid4()) - target.set_system_prompt( + chat_target.set_system_prompt( system_prompt=system_prompt, conversation_id=conversation_id, ) @@ -163,7 +163,7 @@ async def _send_and_parse_async( scorer_llm_request = Message(message_pieces=message_pieces) try: - response = await target.send_prompt_async(message=scorer_llm_request) + response = await chat_target.send_prompt_async(message=scorer_llm_request) except Exception as ex: raise Exception(f"Error scoring prompt with original prompt ID: {scored_prompt_id}") from ex diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 937fa1d0d0..20b8690245 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -14,7 +14,6 @@ cast, ) -from pyrit.common.deprecation import print_deprecation_message from pyrit.exceptions import PyritException from pyrit.memory import CentralMemory, MemoryInterface from pyrit.models import ( @@ -684,9 +683,8 @@ async def _score_value_with_llm_async( """ Send a request to a target, and take care of retries. - .. deprecated:: 0.17.0 - Use ``run_llm_scoring_async`` with a ``ResponseHandler`` instead. This method forwards - to that helper and will be removed in 0.17.0. + This is a thin internal forwarder to ``run_llm_scoring_async``. It remains only so that + scorers that have not yet been migrated to compose the helper directly keep working. The scorer target response should be JSON with value, rationale, and optional metadata and description fields. @@ -731,12 +729,6 @@ async def _score_value_with_llm_async( InvalidJsonException: If the response is not valid JSON. Exception: For other unexpected errors during scoring. """ - print_deprecation_message( - old_item="pyrit.score.scorer.Scorer._score_value_with_llm_async", - new_item="pyrit.score.llm_scoring.run_llm_scoring_async", - removed_in="0.17.0", - ) - response_handler = JsonSchemaResponseHandler( score_value_output_key=score_value_output_key, rationale_output_key=rationale_output_key, @@ -746,7 +738,7 @@ async def _score_value_with_llm_async( ) return await run_llm_scoring_async( - target=prompt_target, + chat_target=prompt_target, system_prompt=system_prompt, response_handler=response_handler, value=message_value, diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py index 14a3621c2a..33e0855087 100644 --- a/pyrit/score/true_false/self_ask_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_true_false_scorer.py @@ -226,7 +226,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: st scoring_data_type = "text" unvalidated_score = await run_llm_scoring_async( - target=self._prompt_target, + chat_target=self._prompt_target, system_prompt=self._system_prompt, response_handler=JsonSchemaResponseHandler(), value=scoring_value, From d3e8bf5b56abd989c2e84a07886259839ad53dc9 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 2 Jul 2026 21:36:29 -0700 Subject: [PATCH 3/3] Make LLM scoring helper internal (_run_llm_scoring_async) PR A is an internal refactor with no public API change, so the shared round-trip helper should not look like public API. Rename run_llm_scoring_async -> _run_llm_scoring_async (matching the already-private _send_and_parse_async) and drop it from pyrit.score's __init__ import and __all__. Both internal callers import it directly from pyrit.score.llm_scoring. ResponseHandler / JsonSchemaResponseHandler stay exported as the new composition abstraction. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/score/__init__.py | 2 -- pyrit/score/llm_scoring.py | 2 +- pyrit/score/scorer.py | 6 +++--- pyrit/score/true_false/self_ask_true_false_scorer.py | 4 ++-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pyrit/score/__init__.py b/pyrit/score/__init__.py index f8c5f3dd5b..011d1ba6ae 100644 --- a/pyrit/score/__init__.py +++ b/pyrit/score/__init__.py @@ -25,7 +25,6 @@ from pyrit.score.float_scale.self_ask_general_float_scale_scorer import SelfAskGeneralFloatScaleScorer from pyrit.score.float_scale.self_ask_likert_scorer import LikertScaleEvalFiles, LikertScalePaths, SelfAskLikertScorer from pyrit.score.float_scale.self_ask_scale_scorer import SelfAskScaleScorer -from pyrit.score.llm_scoring import run_llm_scoring_async from pyrit.score.response_handler import JsonSchemaResponseHandler, ResponseHandler from pyrit.score.scorer import Scorer from pyrit.score.scorer_evaluation.metrics_type import MetricsType, RegistryUpdateBehavior @@ -163,7 +162,6 @@ def __getattr__(name: str) -> object: "RegexScorer", "RegistryUpdateBehavior", "ResponseHandler", - "run_llm_scoring_async", "Scorer", "ScorerEvalDatasetFiles", "ScorerEvaluator", diff --git a/pyrit/score/llm_scoring.py b/pyrit/score/llm_scoring.py index d52c98d4b2..3dd52e60f7 100644 --- a/pyrit/score/llm_scoring.py +++ b/pyrit/score/llm_scoring.py @@ -22,7 +22,7 @@ from pyrit.score.response_handler import ResponseHandler -async def run_llm_scoring_async( +async def _run_llm_scoring_async( *, chat_target: PromptTarget, system_prompt: str, diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 20b8690245..f0d249c4ad 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -32,7 +32,7 @@ ) from pyrit.prompt_target.batch_helper import batch_task_async from pyrit.prompt_target.common.target_requirements import TargetRequirements -from pyrit.score.llm_scoring import run_llm_scoring_async +from pyrit.score.llm_scoring import _run_llm_scoring_async from pyrit.score.response_handler import JsonSchemaResponseHandler if TYPE_CHECKING: @@ -683,7 +683,7 @@ async def _score_value_with_llm_async( """ Send a request to a target, and take care of retries. - This is a thin internal forwarder to ``run_llm_scoring_async``. It remains only so that + This is a thin internal forwarder to ``_run_llm_scoring_async``. It remains only so that scorers that have not yet been migrated to compose the helper directly keep working. The scorer target response should be JSON with value, rationale, and optional metadata and @@ -737,7 +737,7 @@ async def _score_value_with_llm_async( category_output_key=category_output_key, ) - return await run_llm_scoring_async( + return await _run_llm_scoring_async( chat_target=prompt_target, system_prompt=system_prompt, response_handler=response_handler, diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py index 33e0855087..b1988d18a1 100644 --- a/pyrit/score/true_false/self_ask_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_true_false_scorer.py @@ -12,7 +12,7 @@ from pyrit.common.path import SCORER_SEED_PROMPT_PATH from pyrit.models import ComponentIdentifier, MessagePiece, Score, SeedPrompt from pyrit.prompt_target import CHAT_TARGET_REQUIREMENTS, PromptTarget -from pyrit.score.llm_scoring import run_llm_scoring_async +from pyrit.score.llm_scoring import _run_llm_scoring_async from pyrit.score.response_handler import JsonSchemaResponseHandler from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( @@ -225,7 +225,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: st scoring_value = f"objective: {objective}\nresponse: {message_piece.converted_value}" scoring_data_type = "text" - unvalidated_score = await run_llm_scoring_async( + unvalidated_score = await _run_llm_scoring_async( chat_target=self._prompt_target, system_prompt=self._system_prompt, response_handler=JsonSchemaResponseHandler(),