@@ -6582,6 +6582,7 @@ components:
65826582 type : string
65836583 enum : [classify, score, compare]
65846584 description : The type of evaluation to perform
6585+ example : ' classify'
65856586 parameters :
65866587 oneOf :
65876588 - $ref : ' #/components/schemas/EvaluationClassifyParameters'
@@ -6605,17 +6606,20 @@ components:
66056606 type : string
66066607 minItems : 2
66076608 description : List of possible classification labels
6609+ example : ["yes", "no"]
66086610 pass_labels :
66096611 type : array
66106612 items :
66116613 type : string
66126614 minItems : 1
66136615 description : List of labels that are considered passing
6616+ example : ["yes"]
66146617 model_to_evaluate :
66156618 $ref : ' #/components/schemas/EvaluationModelOrString'
66166619 input_data_file_path :
66176620 type : string
66186621 description : Data file ID
6622+ example : ' file-1234-aefd'
66196623
66206624 EvaluationScoreParameters :
66216625 type : object
@@ -6631,19 +6635,23 @@ components:
66316635 min_score :
66326636 type : number
66336637 format : float
6638+ example : 0.0
66346639 description : Minimum possible score
66356640 max_score :
66366641 type : number
66376642 format : float
6643+ example : 10.0
66386644 description : Maximum possible score
66396645 pass_threshold :
66406646 type : number
66416647 format : float
6648+ example : 7.0
66426649 description : Score threshold for passing
66436650 model_to_evaluate :
66446651 $ref : ' #/components/schemas/EvaluationModelOrString'
66456652 input_data_file_path :
66466653 type : string
6654+ example : ' file-01234567890123456789'
66476655 description : Data file ID
66486656
66496657 EvaluationCompareParameters :
@@ -6671,9 +6679,11 @@ components:
66716679 model_name :
66726680 type : string
66736681 description : Name of the judge model
6682+ example : ' meta-llama/Llama-3-70B-Instruct-Turbo'
66746683 system_template :
66756684 type : string
66766685 description : System prompt template for the judge
6686+ example : ' Imagine you are a helpful assistant'
66776687
66786688 EvaluationModelOrString :
66796689 oneOf :
@@ -6693,22 +6703,27 @@ components:
66936703 model_name :
66946704 type : string
66956705 description : Name of the model to evaluate
6706+ example : ' meta-llama/Llama-3-70B-Instruct-Turbo'
66966707 max_tokens :
66976708 type : integer
66986709 minimum : 1
66996710 description : Maximum number of tokens to generate
6711+ example : 512
67006712 temperature :
67016713 type : number
67026714 format : float
67036715 minimum : 0
67046716 maximum : 2
67056717 description : Sampling temperature
6718+ example : 0.7
67066719 system_template :
67076720 type : string
67086721 description : System prompt template
6722+ example : ' Imagine you are helpful assistant'
67096723 input_template :
67106724 type : string
67116725 description : Input prompt template
6726+ example : ' Please classify {{prompt}} based on the labels below'
67126727
67136728 EvaluationResponse :
67146729 type : object
@@ -6727,17 +6742,20 @@ components:
67276742 workflow_id :
67286743 type : string
67296744 description : The evaluation job ID
6745+ example : ' eval-1234aedf'
67306746 type :
67316747 type : string
67326748 enum : [classify, score, compare]
67336749 description : The type of evaluation
6750+ example : classift
67346751 owner_id :
67356752 type : string
67366753 description : ID of the job owner (admin only)
67376754 status :
67386755 type : string
67396756 enum : [pending, queued, running, completed, error, user_error]
67406757 description : Current status of the job
6758+ example : pending
67416759 status_updates :
67426760 type : array
67436761 items :
@@ -6750,10 +6768,12 @@ components:
67506768 type : string
67516769 format : date-time
67526770 description : When the job was created
6771+ example : ' 2025-07-23T17:10:04.837888Z'
67536772 updated_at :
67546773 type : string
67556774 format : date-time
67566775 description : When the job was last updated
6776+ example : ' 2025-07-23T17:10:04.837888Z'
67576777 results :
67586778 oneOf :
67596779 - $ref : ' #/components/schemas/EvaluationClassifyResults'
@@ -6772,13 +6792,16 @@ components:
67726792 status :
67736793 type : string
67746794 description : The status at this update
6795+ example : pending
67756796 message :
67766797 type : string
67776798 description : Additional message for this update
6799+ example : Job is pending evaluation
67786800 timestamp :
67796801 type : string
67806802 format : date-time
67816803 description : When this update occurred
6804+ example : ' 2025-07-23T17:10:04.837888Z'
67826805
67836806 EvaluationClassifyResults :
67846807 type : object
@@ -6788,43 +6811,70 @@ components:
67886811 additionalProperties :
67896812 type : number
67906813 description : Count of samples for each label
6791- yes_pct :
6814+ generation_fail_count :
67926815 type : number
6793- format : float
6816+ format : integer
6817+ nullable : true
6818+ description : Number of failed generations.
6819+ example : 0
6820+ judge_fail_count :
6821+ type : number
6822+ format : integer
67946823 nullable : true
6795- description : Percentage of 'yes' classifications
6796- no_pct :
6824+ description : Number of failed judge generations
6825+ example : 0
6826+ invalid_label_count :
67976827 type : number
67986828 format : float
67996829 nullable : true
6800- description : Percentage of 'no' classifications
6801- result_file :
6830+ description : Number of invalid labels
6831+ example : 0
6832+ result_file_id :
68026833 type : string
68036834 description : Data File ID
6804- classification_report :
6805- type : object
6835+ example : file-1234-aefd
6836+ pass_percentage :
6837+ type : number
6838+ format : integer
68066839 nullable : true
6807- description : Detailed classification metrics
6840+ description : Pecentage of pass labels.
6841+ example : 10
6842+ label_counts :
6843+ type : string
6844+ description : JSON string representing label counts
6845+ example : ' {"yes": 10, "no": 0}'
6846+
68086847
68096848 EvaluationScoreResults :
68106849 type : object
68116850 properties :
6812- scores :
6813- type : array
6814- items :
6815- type : number
6816- format : float
6817- description : Individual scores for each sample
68186851 aggregated_scores :
68196852 type : object
68206853 description : Aggregated score statistics
6821- additional_metrics :
6822- type : object
6854+ generation_fail_count :
6855+ type : number
6856+ format : integer
6857+ nullable : true
6858+ description : Number of failed generations.
6859+ example : 0
6860+ judge_fail_count :
6861+ type : number
6862+ format : integer
68236863 nullable : true
6824- description : Additional evaluation metrics
6825- result_file :
6864+ description : Number of failed judge generations
6865+ example : 0
6866+ invalid_score_count :
6867+ type : number
6868+ format : integer
6869+ description : number of invalid scores generated from model
6870+ failed_samples :
6871+ type : number
6872+ format : integer
6873+ description : number of failed samples generated from model
6874+ result_file_id :
68266875 type : string
68276876 description : Data File ID
6877+ example : file-1234-aefd
68286878
68296879 EvaluationCompareResults :
68306880 type : object
@@ -6841,6 +6891,18 @@ components:
68416891 Ties :
68426892 type : integer
68436893 description : Number of ties
6894+ generation_fail_count :
6895+ type : number
6896+ format : integer
6897+ nullable : true
6898+ description : Number of failed generations.
6899+ example : 0
6900+ judge_fail_count :
6901+ type : number
6902+ format : integer
6903+ nullable : true
6904+ description : Number of failed judge generations
6905+ example : 0
68446906 result_file_id :
68456907 type : string
68466908 description : Data File ID
0 commit comments