|
2 | 2 | # |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | 4 |
|
5 | | -# TODO(@zhewang2001): Please refactor the corresponding code snippets and then upload it. |
| 5 | +import json |
| 6 | +import os |
6 | 7 |
|
| 8 | +from datasets import load_dataset |
7 | 9 |
|
8 | | -def evaluate_cweval(*args): |
9 | | - pass # placeholder |
| 10 | + |
| 11 | +def evaluate_cweval(generation_path, task, cweval_path=None): |
| 12 | + model = generation_path.split("/")[-1].split(".trimmed")[0] |
| 13 | + |
| 14 | + current_dir = os.getcwd() |
| 15 | + generation_path = os.path.abspath(generation_path) |
| 16 | + |
| 17 | + if cweval_path: |
| 18 | + os.chdir(cweval_path) |
| 19 | + else: |
| 20 | + os.chdir("..") |
| 21 | + os.chdir("CWEval") |
| 22 | + base_output_dir = os.path.join(os.getcwd(), "evals", model, "generated_0") |
| 23 | + |
| 24 | + task_dataset = load_dataset(task)["test"] |
| 25 | + task_dict = { |
| 26 | + item["task_id"]: item["file_path"].replace("_task", "_raw") |
| 27 | + for item in task_dataset |
| 28 | + } |
| 29 | + |
| 30 | + os.makedirs(base_output_dir, exist_ok=True) |
| 31 | + |
| 32 | + with open(generation_path, "r") as f: |
| 33 | + data = [json.loads(line) for line in f] |
| 34 | + |
| 35 | + for item in data: |
| 36 | + task_id = item["task_id"] |
| 37 | + file_path = task_dict.get(task_id) |
| 38 | + |
| 39 | + if file_path and "messages" in item: |
| 40 | + assistant_content = None |
| 41 | + for message in item["messages"]: |
| 42 | + if message["role"] == "assistant": |
| 43 | + assistant_content = message["content"] |
| 44 | + break |
| 45 | + |
| 46 | + if assistant_content: |
| 47 | + code_blocks = assistant_content.split("```") |
| 48 | + if len(code_blocks) >= 3: |
| 49 | + code_block_with_lang = code_blocks[1] |
| 50 | + if "\n" in code_block_with_lang: |
| 51 | + code_block = code_block_with_lang.split("\n", 1)[1].strip() |
| 52 | + else: |
| 53 | + code_block = code_block_with_lang.strip() |
| 54 | + |
| 55 | + output_path = os.path.join(base_output_dir, file_path) |
| 56 | + os.makedirs(os.path.dirname(output_path), exist_ok=True) |
| 57 | + |
| 58 | + with open(output_path, "w") as f: |
| 59 | + f.write(code_block) |
| 60 | + |
| 61 | + os.chdir(current_dir) |
0 commit comments