Skip to content

Commit a3793c3

Browse files
Update openapi.yaml
1 parent e6b87ac commit a3793c3

1 file changed

Lines changed: 81 additions & 19 deletions

File tree

openapi.yaml

Lines changed: 81 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6582,6 +6582,7 @@ components:
65826582
type: string
65836583
enum: [classify, score, compare]
65846584
description: The type of evaluation to perform
6585+
example: 'classify'
65856586
parameters:
65866587
oneOf:
65876588
- $ref: '#/components/schemas/EvaluationClassifyParameters'
@@ -6605,17 +6606,20 @@ components:
66056606
type: string
66066607
minItems: 2
66076608
description: List of possible classification labels
6609+
example: ["yes", "no"]
66086610
pass_labels:
66096611
type: array
66106612
items:
66116613
type: string
66126614
minItems: 1
66136615
description: List of labels that are considered passing
6616+
example: ["yes"]
66146617
model_to_evaluate:
66156618
$ref: '#/components/schemas/EvaluationModelOrString'
66166619
input_data_file_path:
66176620
type: string
66186621
description: Data file ID
6622+
example: 'file-1234-aefd'
66196623

66206624
EvaluationScoreParameters:
66216625
type: object
@@ -6631,19 +6635,23 @@ components:
66316635
min_score:
66326636
type: number
66336637
format: float
6638+
example: 0.0
66346639
description: Minimum possible score
66356640
max_score:
66366641
type: number
66376642
format: float
6643+
example: 10.0
66386644
description: Maximum possible score
66396645
pass_threshold:
66406646
type: number
66416647
format: float
6648+
example: 7.0
66426649
description: Score threshold for passing
66436650
model_to_evaluate:
66446651
$ref: '#/components/schemas/EvaluationModelOrString'
66456652
input_data_file_path:
66466653
type: string
6654+
example: 'file-01234567890123456789'
66476655
description: Data file ID
66486656

66496657
EvaluationCompareParameters:
@@ -6671,9 +6679,11 @@ components:
66716679
model_name:
66726680
type: string
66736681
description: Name of the judge model
6682+
example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
66746683
system_template:
66756684
type: string
66766685
description: System prompt template for the judge
6686+
example: 'Imagine you are a helpful assistant'
66776687

66786688
EvaluationModelOrString:
66796689
oneOf:
@@ -6693,22 +6703,27 @@ components:
66936703
model_name:
66946704
type: string
66956705
description: Name of the model to evaluate
6706+
example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
66966707
max_tokens:
66976708
type: integer
66986709
minimum: 1
66996710
description: Maximum number of tokens to generate
6711+
example: 512
67006712
temperature:
67016713
type: number
67026714
format: float
67036715
minimum: 0
67046716
maximum: 2
67056717
description: Sampling temperature
6718+
example: 0.7
67066719
system_template:
67076720
type: string
67086721
description: System prompt template
6722+
example: 'Imagine you are helpful assistant'
67096723
input_template:
67106724
type: string
67116725
description: Input prompt template
6726+
example: 'Please classify {{prompt}} based on the labels below'
67126727

67136728
EvaluationResponse:
67146729
type: object
@@ -6727,17 +6742,20 @@ components:
67276742
workflow_id:
67286743
type: string
67296744
description: The evaluation job ID
6745+
example: 'eval-1234aedf'
67306746
type:
67316747
type: string
67326748
enum: [classify, score, compare]
67336749
description: The type of evaluation
6750+
example: classift
67346751
owner_id:
67356752
type: string
67366753
description: ID of the job owner (admin only)
67376754
status:
67386755
type: string
67396756
enum: [pending, queued, running, completed, error, user_error]
67406757
description: Current status of the job
6758+
example: pending
67416759
status_updates:
67426760
type: array
67436761
items:
@@ -6750,10 +6768,12 @@ components:
67506768
type: string
67516769
format: date-time
67526770
description: When the job was created
6771+
example: '2025-07-23T17:10:04.837888Z'
67536772
updated_at:
67546773
type: string
67556774
format: date-time
67566775
description: When the job was last updated
6776+
example: '2025-07-23T17:10:04.837888Z'
67576777
results:
67586778
oneOf:
67596779
- $ref: '#/components/schemas/EvaluationClassifyResults'
@@ -6772,13 +6792,16 @@ components:
67726792
status:
67736793
type: string
67746794
description: The status at this update
6795+
example: pending
67756796
message:
67766797
type: string
67776798
description: Additional message for this update
6799+
example: Job is pending evaluation
67786800
timestamp:
67796801
type: string
67806802
format: date-time
67816803
description: When this update occurred
6804+
example: '2025-07-23T17:10:04.837888Z'
67826805

67836806
EvaluationClassifyResults:
67846807
type: object
@@ -6788,43 +6811,70 @@ components:
67886811
additionalProperties:
67896812
type: number
67906813
description: Count of samples for each label
6791-
yes_pct:
6814+
generation_fail_count:
67926815
type: number
6793-
format: float
6816+
format: integer
6817+
nullable: true
6818+
description: Number of failed generations.
6819+
example: 0
6820+
judge_fail_count:
6821+
type: number
6822+
format: integer
67946823
nullable: true
6795-
description: Percentage of 'yes' classifications
6796-
no_pct:
6824+
description: Number of failed judge generations
6825+
example: 0
6826+
invalid_label_count:
67976827
type: number
67986828
format: float
67996829
nullable: true
6800-
description: Percentage of 'no' classifications
6801-
result_file:
6830+
description: Number of invalid labels
6831+
example: 0
6832+
result_file_id:
68026833
type: string
68036834
description: Data File ID
6804-
classification_report:
6805-
type: object
6835+
example: file-1234-aefd
6836+
pass_percentage:
6837+
type: number
6838+
format: integer
68066839
nullable: true
6807-
description: Detailed classification metrics
6840+
description: Pecentage of pass labels.
6841+
example: 10
6842+
label_counts:
6843+
type: string
6844+
description: JSON string representing label counts
6845+
example: '{"yes": 10, "no": 0}'
6846+
68086847

68096848
EvaluationScoreResults:
68106849
type: object
68116850
properties:
6812-
scores:
6813-
type: array
6814-
items:
6815-
type: number
6816-
format: float
6817-
description: Individual scores for each sample
68186851
aggregated_scores:
68196852
type: object
68206853
description: Aggregated score statistics
6821-
additional_metrics:
6822-
type: object
6854+
generation_fail_count:
6855+
type: number
6856+
format: integer
6857+
nullable: true
6858+
description: Number of failed generations.
6859+
example: 0
6860+
judge_fail_count:
6861+
type: number
6862+
format: integer
68236863
nullable: true
6824-
description: Additional evaluation metrics
6825-
result_file:
6864+
description: Number of failed judge generations
6865+
example: 0
6866+
invalid_score_count:
6867+
type: number
6868+
format: integer
6869+
description: number of invalid scores generated from model
6870+
failed_samples:
6871+
type: number
6872+
format: integer
6873+
description: number of failed samples generated from model
6874+
result_file_id:
68266875
type: string
68276876
description: Data File ID
6877+
example: file-1234-aefd
68286878

68296879
EvaluationCompareResults:
68306880
type: object
@@ -6841,6 +6891,18 @@ components:
68416891
Ties:
68426892
type: integer
68436893
description: Number of ties
6894+
generation_fail_count:
6895+
type: number
6896+
format: integer
6897+
nullable: true
6898+
description: Number of failed generations.
6899+
example: 0
6900+
judge_fail_count:
6901+
type: number
6902+
format: integer
6903+
nullable: true
6904+
description: Number of failed judge generations
6905+
example: 0
68446906
result_file_id:
68456907
type: string
68466908
description: Data File ID

0 commit comments

Comments
 (0)