@@ -41,6 +41,39 @@ def test_check_jsonl_valid_instruction(tmp_path: Path):
4141 assert report ["has_min_samples" ]
4242
4343
44+ def test_check_jsonl_valid_instruction_multimodal (tmp_path : Path ):
45+ file = tmp_path / "valid_instruction_multimodal.jsonl"
46+ content = [
47+ {
48+ "prompt" : [
49+ {
50+ "type" : "text" ,
51+ "text" : "What's the difference between these two images?" ,
52+ },
53+ {
54+ "type" : "image_url" ,
55+ "image_url" : {"url" : "data:image/jpeg;base64,..." },
56+ },
57+ {
58+ "type" : "image_url" ,
59+ "image_url" : {"url" : "data:image/jpeg;base64,..." },
60+ },
61+ ],
62+ "completion" : "The first image is a cat, the second image is a dog." ,
63+ },
64+ ]
65+
66+ with file .open ("w" ) as f :
67+ f .write ("\n " .join (json .dumps (item ) for item in content ))
68+
69+ report = check_file (file )
70+
71+ assert report ["is_check_passed" ]
72+ assert report ["utf8" ]
73+ assert report ["num_samples" ] == len (content )
74+ assert report ["has_min_samples" ]
75+
76+
4477def test_check_jsonl_valid_conversational_single_turn (tmp_path : Path ):
4578 # Create a valid JSONL file with conversational format and 1 user-assistant turn pair
4679 file = tmp_path / "valid_conversational_single_turn.jsonl"
@@ -120,6 +153,48 @@ def test_check_jsonl_valid_conversational_multiple_turns(tmp_path: Path):
120153 assert report ["has_min_samples" ]
121154
122155
156+ def test_check_jsonl_valid_conversational_multimodal_single_turn (tmp_path : Path ):
157+ file = tmp_path / "valid_conversational_multimodal_single_turn.jsonl"
158+ content = [
159+ {
160+ "messages" : [
161+ {
162+ "role" : "user" ,
163+ "content" : [
164+ {
165+ "type" : "text" ,
166+ "text" : "What's the difference between these two images?" ,
167+ },
168+ {
169+ "type" : "image_url" ,
170+ "image_url" : {"url" : "data:image/jpeg;base64,..." },
171+ },
172+ {
173+ "type" : "image_url" ,
174+ "image_url" : {"url" : "data:image/jpeg;base64,..." },
175+ },
176+ ],
177+ },
178+ {
179+ "role" : "assistant" ,
180+ "content" : [{"type" : "text" , "text" : "Hi there!" }],
181+ },
182+ ]
183+ },
184+ ]
185+
186+ with file .open ("w" ) as f :
187+ f .write ("\n " .join (json .dumps (item ) for item in content ))
188+
189+ report = check_file (file )
190+
191+ print (report )
192+ assert report ["is_check_passed" ]
193+ assert report ["utf8" ]
194+ assert report ["num_samples" ] == len (content )
195+ assert report ["has_min_samples" ]
196+
197+
123198def test_check_jsonl_empty_file (tmp_path : Path ):
124199 # Create an empty JSONL file
125200 file = tmp_path / "empty.jsonl"
0 commit comments