update server for return tensor

gushiqiao · gushiqiao · commit 5c375f094e71 · 2026-04-02T05:20:43.000Z
diff --git a/configs/ltx2/ltx2_3.json b/configs/ltx2/ltx2_3.json
@@ -14,7 +14,7 @@
     "audio_mel_bins":16,
     "double_precision_rope": true,
     "use_tiling_vae": false,
-    "dit_original_ckpt": "Lightricks/LTX-2.3ltx-2.3-22b-dev.safetensors",
+    "dit_original_ckpt": "/data/nvme4/models/ltx-2.3/ltx-2.3-22b-dev.safetensors",
     "caption_proj_before_connector": true,
     "cross_attention_adaln": true,
     "apply_gated_attention": true,
diff --git a/configs/ltx2/ltx2_3_distill_upsample_offload.json b/configs/ltx2/ltx2_3_distill_upsample_offload.json
@@ -15,13 +15,13 @@
     "audio_fps": 24000,
     "audio_mel_bins":16,
     "double_precision_rope": true,
-    "dit_original_ckpt": "Lightricks/LTX-2.3ltx-2.3-22b-distilled.safetensors",
+    "dit_original_ckpt": "/data/nvme4/models/ltx-2.3/ltx-2.3-22b-distilled.safetensors",
     "skip_fp8_block_index" : [0, 43, 44, 45, 46, 47],
     "distilled_sigma_values": [1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0],
     "caption_proj_before_connector": true,
     "cross_attention_adaln": true,
     "apply_gated_attention": true,
     "use_upsampler": true,
-    "upsampler_original_ckpt": "Lightricks/LTX-2.3ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
+    "upsampler_original_ckpt": "/data/nvme4/models/ltx-2.3/ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
     "distilled_sigma_values_upsample": [0.909375, 0.725, 0.421875, 0.0]
 }
diff --git a/lightx2v/models/networks/flux2_klein/infer/pre_infer.py b/lightx2v/models/networks/flux2_klein/infer/pre_infer.py
@@ -1,6 +1,10 @@
 import torch
 import torch.nn.functional as F
-from diffusers.models.transformers.transformer_flux2 import Flux2PosEmbed
+
+try:
+    from diffusers.models.transformers.transformer_flux2 import Flux2PosEmbed
+except ImportError:
+    Flux2PosEmbed = None
 
 from .module_io import Flux2KleinPreInferModuleOutput
 
diff --git a/lightx2v/models/networks/wan/model.py b/lightx2v/models/networks/wan/model.py
@@ -197,3 +197,90 @@ def infer(self, inputs):
             elif self.offload_granularity != "model":
                 self.pre_weight.to_cpu()
                 self.transformer_weights.non_block_weights_to_cpu()
+
+    @torch.no_grad()
+    def infer_tensor_once(self, latents, timestep, context, context_null=None):
+        """
+        Run one WAN forward pass from explicit tensors.
+
+        Args:
+            latents: noisy latents, shape [C,F,H,W] or [1,F,C,H,W].
+            timestep: timestep tensor (scalar / [1] / [1,F]); first value is used.
+            context: conditional text embeddings, shape [L,D] or [1,L,D].
+            context_null: optional unconditional text embeddings, same shape as context.
+        Returns:
+            noise prediction tensor with shape [C,F,H,W].
+        """
+        if self.cpu_offload:
+            if self.offload_granularity == "model" and "wan2.2_moe" not in self.config["model_cls"]:
+                self.to_cuda()
+            elif self.offload_granularity != "model":
+                self.pre_weight.to_cuda()
+                self.transformer_weights.non_block_weights_to_cuda()
+
+        if latents.ndim == 5:
+            # [B,F,C,H,W] -> [C,F,H,W], only batch size 1 is supported.
+            if latents.shape[0] != 1:
+                raise ValueError(f"Expected batch size 1 for 5D latents, got shape {tuple(latents.shape)}")
+            latents = latents.squeeze(0).permute(1, 0, 2, 3).contiguous()
+        elif latents.ndim != 4:
+            raise ValueError(f"Expected latents ndim in [4,5], got {latents.ndim}")
+
+        if context.ndim == 2:
+            context = context.unsqueeze(0)
+        if context.ndim != 3:
+            raise ValueError(f"Expected context ndim in [2,3], got {context.ndim}")
+
+        if context_null is None:
+            context_null = context
+        elif context_null.ndim == 2:
+            context_null = context_null.unsqueeze(0)
+
+        timestep = timestep.flatten()
+        if timestep.numel() == 0:
+            raise ValueError("Empty timestep tensor")
+        timestep = timestep[:1].to(torch.int64).contiguous()
+
+        self.scheduler.prepare(seed=0, latent_shape=[1, 1, 1, 1], image_encoder_output={})
+        self.scheduler.latents = latents.to(AI_DEVICE)
+        self.scheduler.timestep_input = timestep.to(AI_DEVICE)
+
+        inputs = {
+            "text_encoder_output": {
+                "context": context.to(AI_DEVICE),
+                "context_null": context_null.to(AI_DEVICE),
+            },
+            "image_encoder_output": {},
+        }
+
+        def _convert_flow_pred_to_x0(flow_pred, xt, timestep_tensor):
+            original_dtype = flow_pred.dtype
+            flow_pred, xt, sigmas, timesteps = map(
+                lambda x: x.double().to(flow_pred.device),
+                [flow_pred, xt, self.scheduler.sigmas, self.scheduler.timesteps],
+            )
+            timestep_id = torch.argmin((timesteps.unsqueeze(0) - timestep_tensor.unsqueeze(1)).abs(), dim=1)
+            sigma_t = sigmas[timestep_id].reshape(-1, 1, 1, 1)
+            x0_pred = xt - sigma_t * flow_pred
+            return x0_pred.to(original_dtype)
+
+        timestep_for_x0 = timestep.flatten()[:1]
+        if self.config.get("enable_cfg", False):
+            noise_pred_cond = self._infer_cond_uncond(inputs, infer_condition=True)
+            noise_pred_uncond = self._infer_cond_uncond(inputs, infer_condition=False)
+            pred_x0_cond = _convert_flow_pred_to_x0(noise_pred_cond, self.scheduler.latents, timestep_for_x0)
+            pred_x0_uncond = _convert_flow_pred_to_x0(noise_pred_uncond, self.scheduler.latents, timestep_for_x0)
+            noise_pred = noise_pred_uncond + self.scheduler.sample_guide_scale * (noise_pred_cond - noise_pred_uncond)
+            pred_x0 = pred_x0_uncond + self.scheduler.sample_guide_scale * (pred_x0_cond - pred_x0_uncond)
+        else:
+            noise_pred = self._infer_cond_uncond(inputs, infer_condition=True)
+            pred_x0 = _convert_flow_pred_to_x0(noise_pred, self.scheduler.latents, timestep_for_x0)
+
+        if self.cpu_offload:
+            if self.offload_granularity == "model" and "wan2.2_moe" not in self.config["model_cls"]:
+                self.to_cpu()
+            elif self.offload_granularity != "model":
+                self.pre_weight.to_cpu()
+                self.transformer_weights.non_block_weights_to_cpu()
+
+        return noise_pred, pred_x0
diff --git a/lightx2v/models/schedulers/flux2_klein/scheduler.py b/lightx2v/models/schedulers/flux2_klein/scheduler.py
@@ -4,10 +4,19 @@
 
 import numpy as np
 import torch
-from diffusers.pipelines.flux2.pipeline_flux2 import compute_empirical_mu
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import retrieve_timesteps
-from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 
+try:
+    from diffusers.pipelines.flux2.pipeline_flux2 import compute_empirical_mu
+except ImportError:
+    compute_empirical_mu = None
+try:
+    from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import retrieve_timesteps
+except ImportError:
+    retrieve_timesteps = None
+try:
+    from diffusers.pipelines.flux2.pipeline_flux2 import FlowMatchEulerDiscreteScheduler
+except ImportError:
+    FlowMatchEulerDiscreteScheduler = None
 from lightx2v.models.schedulers.scheduler import BaseScheduler
 from lightx2v.utils.envs import GET_DTYPE
 from lightx2v_platform.base.global_var import AI_DEVICE
diff --git a/lightx2v/models/video_encoders/hf/flux2_klein/vae.py b/lightx2v/models/video_encoders/hf/flux2_klein/vae.py
@@ -1,8 +1,15 @@
 import os
 
 import torch
-from diffusers.models import AutoencoderKLFlux2
-from diffusers.pipelines.flux2.image_processor import Flux2ImageProcessor
+
+try:
+    from diffusers.models import AutoencoderKLFlux2
+except ImportError:
+    AutoencoderKLFlux2 = None
+try:
+    from diffusers.pipelines.flux2.image_processor import Flux2ImageProcessor
+except ImportError:
+    Flux2ImageProcessor = None
 
 from lightx2v.utils.envs import GET_DTYPE
 from lightx2v_platform.base.global_var import AI_DEVICE
diff --git a/lightx2v/server/api/tasks/video.py b/lightx2v/server/api/tasks/video.py
@@ -5,7 +5,7 @@
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile
 from loguru import logger
 
-from ...schema import TaskResponse, VideoTaskRequest
+from ...schema import TaskResponse, VideoTaskRequest, WanTensorInferRequest, WanTensorInferResponse
 from ...task_manager import task_manager
 from ..deps import get_services, validate_url_async
 
@@ -107,3 +107,32 @@ async def save_file_async(file: UploadFile, target_dir: Path) -> str:
     except Exception as e:
         logger.error(f"Failed to create video form task: {e}")
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/tensor_infer", response_model=WanTensorInferResponse)
+async def tensor_infer_wan(message: WanTensorInferRequest):
+    services = get_services()
+    assert services.inference_service is not None, "Inference service is not initialized"
+
+    try:
+        payload = {
+            "task_id": message.task_id,
+            "noisy_tensor": message.noisy_tensor,
+            "context_tensor": message.context_tensor,
+            "timestep_tensor": message.timestep_tensor,
+            "context_null_tensor": message.context_null_tensor,
+            "return_pred_x0": message.return_pred_x0,
+        }
+        result = await services.inference_service.submit_tensor_infer_async(payload)
+        if result is None:
+            raise HTTPException(status_code=500, detail="Tensor infer request failed")
+
+        if result.get("status") != "success":
+            raise HTTPException(status_code=500, detail=result.get("error", "Tensor infer failed"))
+
+        return WanTensorInferResponse(**result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to process tensor infer request: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/lightx2v/server/schema.py b/lightx2v/server/schema.py
@@ -72,6 +72,23 @@ class TaskResponse(BaseModel):
     save_result_path: str
 
 
+class WanTensorInferRequest(BaseTaskRequest):
+    noisy_tensor: str = Field(..., description="Base64-encoded torch tensor, shape [1,F,C,H,W] or [C,F,H,W]")
+    context_tensor: str = Field(..., description="Base64-encoded torch tensor, shape [1,L,D] or [L,D]")
+    timestep_tensor: str = Field(..., description="Base64-encoded torch tensor, scalar or [1] / [1,F]")
+    context_null_tensor: str = Field("", description="Optional base64 tensor for unconditional context")
+    return_pred_x0: bool = Field(False, description="Whether to also return pred_x0")
+
+
+class WanTensorInferResponse(BaseModel):
+    task_id: str
+    status: str
+    noise_pred_tensor: str = Field("", description="Base64-encoded torch tensor")
+    pred_x0_tensor: str = Field("", description="Base64-encoded torch tensor")
+    message: str = Field("", description="Execution message")
+    error: str = Field("", description="Error message when status=failed")
+
+
 class StopTaskResponse(BaseModel):
     stop_status: str
     reason: str
diff --git a/lightx2v/server/services/inference/service.py b/lightx2v/server/services/inference/service.py
@@ -72,6 +72,32 @@ async def submit_task_async(self, task_data: dict) -> Optional[dict]:
                 "message": f"Task processing failed: {str(e)}",
             }
 
+    async def submit_tensor_infer_async(self, task_data: dict) -> Optional[dict]:
+        if not self.is_running or not self.worker:
+            logger.error("Inference service is not started")
+            return None
+
+        if self.worker.rank != 0:
+            return None
+
+        try:
+            if self.worker.processing:
+                logger.info("Waiting for previous task to complete before tensor infer request")
+
+            self.worker.processing = True
+            result = await self.worker.process_tensor_request(task_data)
+            self.worker.processing = False
+            return result
+        except Exception as e:
+            self.worker.processing = False
+            logger.error(f"Failed to process tensor infer request: {str(e)}")
+            return {
+                "task_id": task_data.get("task_id", "unknown"),
+                "status": "failed",
+                "error": str(e),
+                "message": f"Tensor infer processing failed: {str(e)}",
+            }
+
     def server_metadata(self):
         assert hasattr(self, "args"), "Distributed inference service has not been started. Call start_distributed_inference() first."
         return {"nproc_per_node": self.worker.world_size, "model_cls": self.args.model_cls, "model_path": self.args.model_path}
diff --git a/lightx2v/server/services/inference/worker.py b/lightx2v/server/services/inference/worker.py
@@ -1,5 +1,7 @@
 import asyncio
+import base64
 import os
+from io import BytesIO
 from pathlib import Path
 from typing import Any, Dict
 
@@ -123,6 +125,95 @@ async def process_request(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
         else:
             return None
 
+    @staticmethod
+    def _decode_tensor_base64(tensor_b64: str, device: str | torch.device) -> torch.Tensor:
+        tensor_bytes = base64.b64decode(tensor_b64)
+        buffer = BytesIO(tensor_bytes)
+        return torch.load(buffer, map_location=device)
+
+    @staticmethod
+    def _encode_tensor_base64(tensor: torch.Tensor) -> str:
+        buffer = BytesIO()
+        torch.save(tensor.detach().cpu(), buffer)
+        return base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+    @staticmethod
+    def _lookup_sigma_from_scheduler(scheduler, timestep_tensor: torch.Tensor, target_device: torch.device, target_dtype: torch.dtype) -> torch.Tensor:
+        # Match Self-Forcing wan_wrapper logic: nearest timestep id -> scheduler.sigmas[timestep_id]
+        timesteps = scheduler.timesteps.to(target_device, dtype=torch.float64)
+        sigmas = scheduler.sigmas.to(target_device, dtype=torch.float64)
+        t = timestep_tensor.flatten().to(target_device, dtype=torch.float64)
+        timestep_id = torch.argmin((timesteps.unsqueeze(0) - t.unsqueeze(1)).abs(), dim=1)
+        sigma_t = sigmas[timestep_id].to(target_dtype)
+        return sigma_t
+
+    def _ensure_tensor_infer_scheduler_ready(self) -> None:
+        scheduler = self.runner.model.scheduler
+        if getattr(scheduler, "timesteps", None) is not None and getattr(scheduler, "sigmas", None) is not None:
+            return
+        # We only need scheduler metadata here, so use a tiny latent shape.
+        scheduler.prepare(
+            seed=0,
+            latent_shape=[16, 1, 2, 2],
+            image_encoder_output={},
+        )
+
+    async def process_tensor_request(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
+        if self.world_size > 1:
+            return {
+                "task_id": task_data.get("task_id", "unknown"),
+                "status": "failed",
+                "error": "tensor infer endpoint currently supports WORLD_SIZE=1 only",
+                "message": "tensor infer endpoint currently supports WORLD_SIZE=1 only",
+            }
+
+        try:
+            if not hasattr(self.runner, "model"):
+                raise RuntimeError("Runner model is not initialized")
+
+            if not hasattr(self.runner.model, "infer_tensor_once"):
+                raise RuntimeError(f"Current model class does not support tensor infer: {type(self.runner.model).__name__}")
+
+            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+            self._ensure_tensor_infer_scheduler_ready()
+            noisy_tensor = self._decode_tensor_base64(task_data["noisy_tensor"], device=device)
+            context_tensor = self._decode_tensor_base64(task_data["context_tensor"], device=device)
+            timestep_tensor = self._decode_tensor_base64(task_data["timestep_tensor"], device=device)
+
+            context_null_tensor = None
+            if task_data.get("context_null_tensor"):
+                context_null_tensor = self._decode_tensor_base64(task_data["context_null_tensor"], device=device)
+
+            return_pred_x0 = bool(task_data.get("return_pred_x0", False))
+
+            noise_pred, pred_x0 = self.runner.model.infer_tensor_once(
+                latents=noisy_tensor,
+                timestep=timestep_tensor,
+                context=context_tensor,
+                context_null=context_null_tensor,
+            )
+            if not return_pred_x0:
+                pred_x0 = None
+
+            return {
+                "task_id": task_data.get("task_id", "unknown"),
+                "status": "success",
+                "noise_pred_tensor": self._encode_tensor_base64(noise_pred),
+                "pred_x0_tensor": self._encode_tensor_base64(pred_x0) if pred_x0 is not None else "",
+                "message": "Tensor infer completed",
+                "error": "",
+            }
+        except Exception as e:
+            logger.exception(f"Rank {self.rank} tensor inference failed: {e}")
+            return {
+                "task_id": task_data.get("task_id", "unknown"),
+                "status": "failed",
+                "noise_pred_tensor": "",
+                "pred_x0_tensor": "",
+                "message": f"Tensor infer failed: {e}",
+                "error": str(e),
+            }
+
     def switch_lora(self, lora_name: str, lora_strength: float):
         try:
             if lora_name is None:
diff --git a/scripts/ltx2/run_ltx2_i2av.sh b/scripts/ltx2/run_ltx2_i2av.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 
 # set path and first
-lightx2v_path=/path/to/LightX2V
-model_path=Lightricks/LTX-2
+lightx2v_path=/data/nvme4/gushiqiao/LightX2V
+model_path=/data/nvme0/gushiqiao/models/official_models/LTX-2/
 
 
 export CUDA_VISIBLE_DEVICES=0
@@ -13,10 +13,11 @@ source ${lightx2v_path}/scripts/base/base.sh
 python -m lightx2v.infer \
 --model_cls ltx2 \
 --task i2av \
---image_path "${lightx2v_path}/assets/inputs/imgs/woman.jpeg" \
+--image_path "/data/nvme4/gushiqiao/debug.png" \
 --model_path $model_path \
---config_json ${lightx2v_path}/configs/ltx2/ltx2.json \
---prompt "A young woman with wavy, shoulder-length light brown hair is singing and dancing joyfully outdoors on a foggy day. She wears a cozy pink turtleneck sweater, swaying gracefully to the music with animated expressions and bright, piercing blue eyes. Her movements are fluid and energetic as she twirls and gestures expressively. A wooden fence and a misty, grassy field fade into the background, creating a dreamy atmosphere for her lively performance." \
+--config_json ${lightx2v_path}/configs/ltx2/ltx2_3.json \
+--prompt "人物坐起身，双手撑床沿，双腿垂下床，身体前倾准备站起，镜头固定，表情从睡意转为清醒，动作流畅" \
 --negative_prompt "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts." \
---save_result_path ${lightx2v_path}/save_results/output_lightx2v_ltx2_i2av.mp4 \
---image_strength 1.0
+--save_result_path ${lightx2v_path}/save_results/output_lightx2v_ltx2_i2av3.mp4 \
+--image_strength 1.0 \
+--seed 0
diff --git a/scripts/server/post_tensor.py b/scripts/server/post_tensor.py
diff --git a/scripts/server/start_server.sh b/scripts/server/start_server.sh
diff --git a/scripts/server/start_server_tensor.sh b/scripts/server/start_server_tensor.sh