|
23 | 23 | from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE |
24 | 24 | from lightx2v.server.metrics import monitor_cli |
25 | 25 | from lightx2v.utils.envs import * |
| 26 | +from lightx2v.utils.input_info import UNSET |
26 | 27 | from lightx2v.utils.profiler import * |
27 | 28 | from lightx2v.utils.registry_factory import RUNNER_REGISTER |
28 | 29 | from lightx2v.utils.utils import find_torch_model_path, fixed_shape_resize, get_optimal_patched_size_with_sp, isotropic_crop_resize, load_weights, wan_vae_to_comfy |
@@ -315,8 +316,14 @@ def read_audio_input(self, audio_path): |
315 | 316 | if expected_frames < int(self.video_duration * target_fps): |
316 | 317 | logger.warning(f"Input video duration is greater than actual audio duration, using audio duration instead: audio_duration={audio_len / target_fps}, video_duration={self.video_duration}") |
317 | 318 |
|
318 | | - # Segment audio |
319 | | - audio_segments = self._audio_processor.segment_audio(audio_array, expected_frames, self.config.get("target_video_length", 81), self.prev_frame_length) |
| 319 | + # Segment audio (CLI / input_info wins over config_json; target_video_length is not merged into config) |
| 320 | + target_video_length = self.config.get("target_video_length", 81) |
| 321 | + ii = getattr(self, "input_info", None) |
| 322 | + if ii is not None and hasattr(ii, "target_video_length"): |
| 323 | + tvl = ii.target_video_length |
| 324 | + if tvl is not None and tvl is not UNSET and tvl > 0: |
| 325 | + target_video_length = tvl |
| 326 | + audio_segments = self._audio_processor.segment_audio(audio_array, expected_frames, target_video_length, self.prev_frame_length) |
320 | 327 |
|
321 | 328 | # Mask latent for multi-person s2v |
322 | 329 | if mask_files is not None: |
|
0 commit comments