Skip to content

音频推理报错 #113

@GoGo-UpUp

Description

@GoGo-UpUp

Describe the bug
安装好vllm后,通过vllm seve命令部署好MiniCPM-o-4_5模型,音频推理报如下错误

(APIServer pid=111789) Traceback (most recent call last):
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 416, in run_asgi
(APIServer pid=111789)     result = await app(  # type: ignore[func-returns-value]
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
(APIServer pid=111789)     return await self.app(scope, receive, send)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1160, in __call__
(APIServer pid=111789)     await super().__call__(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 107, in __call__
(APIServer pid=111789)     await self.middleware_stack(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 164, in __call__
(APIServer pid=111789)     await self.app(scope, receive, _send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/cors.py", line 87, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 177, in __call__
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 175, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send_wrapper)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/exceptions.py", line 63, in __call__
(APIServer pid=111789)     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(APIServer pid=111789)     await app(scope, receive, sender)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 716, in __call__
(APIServer pid=111789)     await self.middleware_stack(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 736, in app
(APIServer pid=111789)     await route.handle(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 290, in handle
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 130, in app
(APIServer pid=111789)     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(APIServer pid=111789)     await app(scope, receive, sender)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 116, in app
(APIServer pid=111789)     response = await f(request)
(APIServer pid=111789)                ^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 670, in app
(APIServer pid=111789)     raw_response = await run_endpoint_function(
(APIServer pid=111789)                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 324, in run_endpoint_function
(APIServer pid=111789)     return await dependant.call(**values)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/utils.py", line 95, in wrapper
(APIServer pid=111789)     return handler_task.result()
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/utils.py", line 116, in wrapper
(APIServer pid=111789)     return await func(*args, **kwargs)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/api_router.py", line 55, in create_chat_completion
(APIServer pid=111789)     generator = await handler.create_chat_completion(request, raw_request)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/serving.py", line 305, in create_chat_completion
(APIServer pid=111789)     result = await self.render_chat_request(request)
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/serving.py", line 261, in render_chat_request
(APIServer pid=111789)     conversation, engine_prompts = await self._preprocess_chat(
(APIServer pid=111789)                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/engine/serving.py", line 898, in _preprocess_chat
(APIServer pid=111789)     (conversation,), (engine_prompt,) = await renderer.render_chat_async(
(APIServer pid=111789)                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 823, in render_chat_async
(APIServer pid=111789)     self.process_for_engine(prompt, arrival_time) for prompt in tok_prompts
(APIServer pid=111789)     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 715, in process_for_engine
(APIServer pid=111789)     engine_prompt = self._process_singleton(prompt)
(APIServer pid=111789)                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 691, in _process_singleton
(APIServer pid=111789)     return self._process_tokens(prompt)  # type: ignore[arg-type]
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 636, in _process_tokens
(APIServer pid=111789)     inputs = self._process_multimodal(
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 622, in _process_multimodal
(APIServer pid=111789)     mm_inputs = mm_processor.apply(mm_processor_inputs, mm_timing_ctx)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1663, in apply
(APIServer pid=111789)     ) = self._cached_apply_hf_processor(inputs, timing_ctx)
(APIServer pid=111789)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1452, in _cached_apply_hf_processor
(APIServer pid=111789)     ) = self._apply_hf_processor_main(
(APIServer pid=111789)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1269, in _apply_hf_processor_main
(APIServer pid=111789)     mm_processed_data = self._apply_hf_processor_mm_only(
(APIServer pid=111789)                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1227, in _apply_hf_processor_mm_only
(APIServer pid=111789)     _, mm_processed_data, _ = self._apply_hf_processor_text_mm(
(APIServer pid=111789)                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1154, in _apply_hf_processor_text_mm
(APIServer pid=111789)     processed_data = self._call_hf_processor(
(APIServer pid=111789)                      ^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmv.py", line 880, in _call_hf_processor
(APIServer pid=111789)     mm_inputs = self.process_mm_inputs(mm_data, mm_kwargs, tok_kwargs)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 427, in process_mm_inputs
(APIServer pid=111789)     **self.process_audios(mm_data, mm_kwargs, tok_kwargs),
(APIServer pid=111789)       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 402, in process_audios
(APIServer pid=111789)     feat[:, :feature_len]
(APIServer pid=111789)     ~~~~^^^^^^^^^^^^^^^^^
(APIServer pid=111789) TypeError: only integer tensors of a single element can be converted to an index

问题定位
我按照报错,定位到如下代码
vllm/model_executor/models/minicpmo.py文件中MiniCPMOMultiModalProcessor类的process_audios
报错代码如下

unpadded_audio_features = [
                feat[:, :feature_len]
                for feat, feature_len in zip(
                    audio_inputs["audio_features"],
                    audio_inputs["audio_feature_lens"],
                )
            ]
            audio_inputs["audio_features"] = unpadded_audio_features

打印audio_inputs["audio_feature_lens"]结果如下:

[tensor([3000, 3000, 3000, 3000, 1187])]

我按照如下方式将audio_inputs["audio_feature_lens"]转换成list:

audio_inputs["audio_feature_lens"] = audio_inputs["audio_feature_lens"][0].cpu().tolist()

重新运行后报新的错误

(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] EngineCore encountered a fatal error.
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] Traceback (most recent call last):
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1091, in run_engine_core
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     engine_core.run_busy_loop()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1126, in run_busy_loop
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._process_engine_step()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1163, in _process_engine_step
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     outputs, model_executed = self.step_fn()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                               ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 447, in step_with_batch_queue
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     exec_future = self.model_executor.execute_model(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/executor/uniproc_executor.py", line 112, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     output.result()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/lib/python3.12/concurrent/futures/_base.py", line 449, in result
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return self.__get_result()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     raise self._exception
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/executor/uniproc_executor.py", line 82, in collective_rpc
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     result = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/serial_utils.py", line 459, in run_method
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/worker_base.py", line 332, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return self.worker.execute_model(scheduler_output)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_worker.py", line 816, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     output = self.model_runner.execute_model(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 3740, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     ) = self._preprocess(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]         ^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 2992, in _preprocess
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._execute_mm_encoder(scheduler_output)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 2645, in _execute_mm_encoder
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     batch_outputs = model.embed_multimodal(**mm_kwargs_batch)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmv.py", line 1149, in embed_multimodal
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     modalities = self._parse_and_validate_multimodal_inputs(**kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 838, in _parse_and_validate_multimodal_inputs
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     modalities["audios"] = self._parse_and_validate_audio_input(**kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 820, in _parse_and_validate_audio_input
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return MiniCPMOAudioFeatureInputs(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 70, in __init__
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self.validate()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 249, in validate
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._validate_tensor_shape_expected(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 173, in _validate_tensor_shape_expected
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     raise ValueError(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] ValueError: audio_feature_lens has rank 1 but expected 2. Expected shape: ('bn', 's'), but got (1,)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions