@@ -21,72 +21,67 @@ def build_prompt(
2121
2222 @staticmethod
2323 @abstractmethod
24- def parse_response (response : str ) -> Any :
24+ def parse_response (response : str ) -> list [ dict ] :
2525 """Parse the LLM response and return the generated QAs"""
2626
2727 async def generate (
2828 self ,
2929 batch : tuple [
3030 list [tuple [str , dict ]], list [tuple [Any , Any , dict ] | tuple [Any , Any , Any ]]
3131 ],
32- ) -> dict [ str , Any ]:
32+ ) -> list [ dict ]:
3333 """
3434 Generate QAs based on a given batch.
3535 :param batch
3636 :return: QA pairs
3737 """
38- result = {}
3938 prompt = self .build_prompt (batch )
4039 response = await self .llm_client .generate_answer (prompt )
4140 qa_pairs = self .parse_response (response ) # generate one or more QA pairs
42- result .update (qa_pairs )
43- return result
41+ return qa_pairs
4442
4543 @staticmethod
4644 def format_generation_results (
4745 results : list [dict ], output_data_format : str
4846 ) -> list [dict [str , Any ]]:
4947
5048 flat_results = []
51- for item in results :
52- for _ , qa_data in item .items ():
53- question = qa_data .get ("question" , "" )
54- answer = qa_data .get ("answer" , "" )
55- if "options" in qa_data and qa_data ["options" ]:
56- options = qa_data ["options" ]
57- options_str = "\n " .join (
58- [f"{ key } . { options [key ]} " for key in sorted (options .keys ())]
59- )
60- question += f"\n Options:\n { options_str } "
49+ for qa_data in results :
50+ question = qa_data .get ("question" , "" )
51+ answer = qa_data .get ("answer" , "" )
52+ if "options" in qa_data and qa_data ["options" ]:
53+ options = qa_data ["options" ]
54+ options_str = "\n " .join (
55+ [f"{ key } . { options [key ]} " for key in sorted (options .keys ())]
56+ )
57+ question += f"\n Options:\n { options_str } "
6158
62- if output_data_format == "Alpaca" :
63- flat_results .append (
64- {
65- "instruction" : question ,
66- "input" : "" ,
67- "output" : answer ,
68- }
69- )
70- elif output_data_format == "Sharegpt" :
71- flat_results .append (
72- {
73- "conversations" : [
74- {"from" : "human" , "value" : question },
75- {"from" : "gpt" , "value" : answer },
76- ]
77- }
78- )
79- elif output_data_format == "ChatML" :
80- flat_results .append (
81- {
82- "messages" : [
83- {"role" : "user" , "content" : question },
84- {"role" : "assistant" , "content" : answer },
85- ]
86- }
87- )
88- else :
89- raise ValueError (
90- f"Unknown output data format: { output_data_format } "
91- )
59+ if output_data_format == "Alpaca" :
60+ flat_results .append (
61+ {
62+ "instruction" : question ,
63+ "input" : "" ,
64+ "output" : answer ,
65+ }
66+ )
67+ elif output_data_format == "Sharegpt" :
68+ flat_results .append (
69+ {
70+ "conversations" : [
71+ {"from" : "human" , "value" : question },
72+ {"from" : "gpt" , "value" : answer },
73+ ]
74+ }
75+ )
76+ elif output_data_format == "ChatML" :
77+ flat_results .append (
78+ {
79+ "messages" : [
80+ {"role" : "user" , "content" : question },
81+ {"role" : "assistant" , "content" : answer },
82+ ]
83+ }
84+ )
85+ else :
86+ raise ValueError (f"Unknown output data format: { output_data_format } " )
9287 return flat_results
0 commit comments