Skip to content

Commit 96e85f9

Browse files
wangshankunAntigravity
authored andcommitted
[feat] sekotalk v2.5 通过命令行控制每段生成长度
1 parent 77f0469 commit 96e85f9

3 files changed

Lines changed: 12 additions & 2 deletions

File tree

lightx2v/infer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def main():
165165
parser.add_argument("--save_result_path", type=str, default=None, help="The path to save video path/file")
166166
parser.add_argument("--return_result_tensor", action="store_true", help="Whether to return result tensor. (Useful for comfyui)")
167167
parser.add_argument("--target_shape", type=int, nargs="+", default=[], help="Set return video or image shape")
168+
parser.add_argument("--target_video_length", type=int, default=81, help="The target video length for each generated clip")
168169
parser.add_argument("--aspect_ratio", type=str, default="")
169170
parser.add_argument("--video_path", type=str, default=None, help="input video path(for sr/v2v task)")
170171
parser.add_argument("--sr_ratio", type=float, default=2.0, help="super resolution ratio for sr task")

lightx2v/models/runners/wan/wan_audio_runner.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE
2424
from lightx2v.server.metrics import monitor_cli
2525
from lightx2v.utils.envs import *
26+
from lightx2v.utils.input_info import UNSET
2627
from lightx2v.utils.profiler import *
2728
from lightx2v.utils.registry_factory import RUNNER_REGISTER
2829
from lightx2v.utils.utils import find_torch_model_path, fixed_shape_resize, get_optimal_patched_size_with_sp, isotropic_crop_resize, load_weights, wan_vae_to_comfy
@@ -315,8 +316,14 @@ def read_audio_input(self, audio_path):
315316
if expected_frames < int(self.video_duration * target_fps):
316317
logger.warning(f"Input video duration is greater than actual audio duration, using audio duration instead: audio_duration={audio_len / target_fps}, video_duration={self.video_duration}")
317318

318-
# Segment audio
319-
audio_segments = self._audio_processor.segment_audio(audio_array, expected_frames, self.config.get("target_video_length", 81), self.prev_frame_length)
319+
# Segment audio (CLI / input_info wins over config_json; target_video_length is not merged into config)
320+
target_video_length = self.config.get("target_video_length", 81)
321+
ii = getattr(self, "input_info", None)
322+
if ii is not None and hasattr(ii, "target_video_length"):
323+
tvl = ii.target_video_length
324+
if tvl is not None and tvl is not UNSET and tvl > 0:
325+
target_video_length = tvl
326+
audio_segments = self._audio_processor.segment_audio(audio_array, expected_frames, target_video_length, self.prev_frame_length)
320327

321328
# Mask latent for multi-person s2v
322329
if mask_files is not None:

lightx2v/utils/input_info.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ class S2VInputInfo:
121121
resized_shape: list = field(default_factory=list)
122122
latent_shape: list = field(default_factory=list)
123123
target_shape: list = field(default_factory=list)
124+
target_video_length: int = field(default_factory=int)
124125

125126
# prev info
126127
overlap_frame: torch.Tensor = field(default_factory=lambda: None)
@@ -148,6 +149,7 @@ class RS2VInputInfo:
148149
resized_shape: list = field(default_factory=list)
149150
latent_shape: list = field(default_factory=list)
150151
target_shape: list = field(default_factory=list)
152+
target_video_length: int = field(default_factory=int)
151153

152154
# prev info
153155
overlap_frame: torch.Tensor = field(default_factory=lambda: None)

0 commit comments

Comments
 (0)