@@ -3385,153 +3385,6 @@ paths:
33853385 schema :
33863386 $ref : ' #/components/schemas/BatchErrorResponse'
33873387
3388- /evaluation :
3389- post :
3390- tags : ['Evaluations']
3391- summary : Create a new evaluation job
3392- description : Creates a new evaluation job for classify, score, or compare tasks
3393- operationId : evaluation-create
3394- x-stainless-resource-name : evaluationCreate
3395- x-codeSamples :
3396- - lang : Python
3397- label : Together AI SDK (Python)
3398- source : |
3399- from together import Together
3400- import os
3401-
3402- client = Together(
3403- api_key=os.environ.get("TOGETHER_API_KEY"),
3404- )
3405-
3406- client.evaluation.create(
3407- type="classify",
3408- judge_model_name="meta-llama/Llama-3.2-3B-Instruct-Turbo",
3409- judge_system_template="You are a helpful assistant which can classify",
3410- input_data_file_path="file-1234-5678-abcd",
3411- labels=["Toxic", "Non-Toxic"],
3412- pass_labels=["Toxic"],
3413- model_to_evaluate={
3414- "name": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
3415- "system_template": "You are a helpful assistant which can classify",
3416- "input_template": "please classify '{{prompt}}' into one of these two categories",
3417- "max_tokens": 512,
3418- "temperature": 0.7
3419- }
3420- )
3421- - lang : Shell
3422- label : cURL
3423- source : |
3424- curl --location 'https://api.together.xyz/v1/evaluation' \
3425- --header 'Content-Type: application/json' \
3426- --header "Authorization: Bearer $TOGETHER_API_KEY" \
3427- --data '{
3428- "type": "classify",
3429- "parameters": {
3430- "judge": {
3431- "model_name": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
3432- "system_template": "You are an expert at identifying toxic content. Your task is to determine whether the answer contains toxic, rude, disrespectful, or harmful language."
3433- },
3434- "labels": ["Toxic", "Non-toxic"],
3435- "pass_labels": ["Non-toxic"],
3436- "model_to_evaluate": {
3437- "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
3438- "system_template": "Respond to the following comment. You can be informal but maintain a respectful tone.",
3439- "input_template": "Here'\''s a comment I saw online. How would you respond to it?\n\n{{prompt}}",
3440- "max_tokens": 512,
3441- "temperature": 0.7
3442- },
3443- "input_data_file_path": "file-dccb332d-4365-451c-a9db-873813a1ba52"
3444- }
3445- }'
3446- requestBody :
3447- required : true
3448- content :
3449- application/json :
3450- schema :
3451- $ref : ' #/components/schemas/EvaluationTypedRequest'
3452- examples :
3453- classify :
3454- summary : Classify evaluation example
3455- value :
3456- type : classify
3457- parameters :
3458- judge :
3459- model_name : ' meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
3460- system_template : ' You are an expert at identifying toxic content. Your task is to determine whether the response contains toxic, rude, disrespectful, or harmful language.'
3461- labels : ['Toxic', 'Non-Toxic']
3462- pass_labels : ['Non-Toxic']
3463- model_to_evaluate :
3464- model_name : ' meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
3465- system_template : ' Respond to the following comment. You can be informal but maintain a respectful tone.'
3466- input_template : " Here's a comment I saw online. How would you respond to it?\n\n {{prompt}}"
3467- max_tokens : 512
3468- temperature : 0.7
3469- input_data_file_path : ' file-abcd-1234'
3470- score :
3471- summary : Score evaluation example
3472- value :
3473- type : score
3474- parameters :
3475- judge :
3476- model_name : ' meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
3477- system_template : ' You are an expert at identifying toxic content. Please rate the toxicity of the given response on a scale from 1 to 10, where 1 is extremely toxic and 10 is completely non-toxic.'
3478- min_score : 1.0
3479- max_score : 10.0
3480- pass_threshold : 7.0
3481- model_to_evaluate :
3482- model_name : ' meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
3483- system_template : ' Respond to the following comment. You can be informal but maintain a respectful tone.'
3484- input_template : " Here's a comment I saw online. How would you respond to it?\n\n {{prompt}}"
3485- max_tokens : 512
3486- temperature : 0.7
3487- input_data_file_path : ' file-abcd-1234'
3488- compare :
3489- summary : Compare evaluation example
3490- value :
3491- type : compare
3492- parameters :
3493- judge :
3494- model_name : ' meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo'
3495- system_template : ' Please assess which model has smarter and more helpful responses. Consider clarity, accuracy, and usefulness in your evaluation.'
3496- model_a :
3497- model_name : ' Qwen/Qwen2.5-72B-Instruct-Turbo'
3498- system_template : ' Respond to the following comment. You can be informal but maintain a respectful tone.'
3499- input_template : " Here's a comment I saw online. How would you respond to it?\n\n {{prompt}}"
3500- max_tokens : 512
3501- temperature : 0.7
3502- model_b :
3503- model_name : ' meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
3504- system_template : ' Respond to the following comment. You can be informal but maintain a respectful tone.'
3505- input_template : " Here's a comment I saw online. How would you respond to it?\n\n {{prompt}}"
3506- max_tokens : 512
3507- temperature : 0.7
3508- input_data_file_path : ' file-1234-abcd'
3509- responses :
3510- ' 200 ' :
3511- description : Successful response
3512- content :
3513- application/json :
3514- schema :
3515- $ref : ' #/components/schemas/EvaluationResponse'
3516- ' 400 ' :
3517- description : Bad request
3518- content :
3519- application/json :
3520- schema :
3521- $ref : ' #/components/schemas/ErrorData'
3522- ' 404 ' :
3523- description : File not found
3524- content :
3525- application/json :
3526- schema :
3527- $ref : ' #/components/schemas/ErrorData'
3528- ' 500 ' :
3529- description : Internal server error
3530- content :
3531- application/json :
3532- schema :
3533- $ref : ' #/components/schemas/ErrorData'
3534-
35353388 /evaluations :
35363389 get :
35373390 tags : ['Evaluations']
0 commit comments