diff --git a/samples/HttpSpeechSynthesizerUsage.java b/samples/HttpSpeechSynthesizerUsage.java
new file mode 100644
index 0000000..4586a0e
--- /dev/null
+++ b/samples/HttpSpeechSynthesizerUsage.java
@@ -0,0 +1,234 @@
+// Copyright (c) Alibaba, Inc. and its affiliates.
+
+import com.alibaba.dashscope.audio.http_tts.AudioInfo;
+import com.alibaba.dashscope.audio.http_tts.HttpSpeechSynthesisParam;
+import com.alibaba.dashscope.audio.http_tts.HttpSpeechSynthesisResult;
+import com.alibaba.dashscope.audio.http_tts.HttpSpeechSynthesizer;
+import com.alibaba.dashscope.common.ResultCallback;
+import com.alibaba.dashscope.exception.ApiException;
+import com.alibaba.dashscope.exception.InputRequiredException;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+import com.alibaba.dashscope.utils.Constants;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.concurrent.CountDownLatch;
+
+/**
+ * Example usage of HttpSpeechSynthesizer for HTTP SSE-based text-to-speech synthesis.
+ *
+ * <p>Make sure to set the DASHSCOPE_API_KEY environment variable before running this example.
+ *
+ * @author DashScope SDK Team
+ */
+public class HttpSpeechSynthesizerUsage {
+
+  /**
+   * Demonstrates synchronous call with SSE - blocks until synthesis is complete and returns audio
+   * data.
+   */
+  public static void syncCall() {
+    System.out.println("=== Synchronous Call with SSE Example ===");
+
+    // Create synthesizer
+    HttpSpeechSynthesizer synthesizer = new HttpSpeechSynthesizer();
+
+    // Build parameters
+    HttpSpeechSynthesisParam param =
+        HttpSpeechSynthesisParam.builder()
+            .model("cosyvoice-v3-flash")
+            .text("我家的后面有一个很大的园。")
+            .voice("longanyang")
+            .format("wav")
+            .sampleRate(24000)
+            .build();
+
+    try {
+      // Call and get complete audio data
+      ByteBuffer audioData = synthesizer.callAndReturnAudio(param);
+
+      // Save to file
+      if (audioData != null && audioData.hasRemaining()) {
+        byte[] bytes = new byte[audioData.remaining()];
+        audioData.get(bytes);
+
+        try (FileOutputStream fos = new FileOutputStream("sync_output.wav")) {
+          fos.write(bytes);
+          System.out.println("Audio saved to sync_output.wav, size: " + bytes.length + " bytes");
+        } catch (IOException e) {
+          System.err.println("Failed to save audio: " + e.getMessage());
+        }
+      }
+
+    } catch (ApiException | NoApiKeyException | InputRequiredException e) {
+      System.err.println("Synthesis failed: " + e.getMessage());
+    }
+  }
+
+  /**
+   * Demonstrates synchronous call without SSE - returns audio URL instead of audio data. This is a
+   * simpler and faster way to get the synthesized audio.
+   */
+  public static void syncCallWithUrl() {
+    System.out.println("\n=== Synchronous Call without SSE (returns Audio URL) ===");
+
+    HttpSpeechSynthesizer synthesizer = new HttpSpeechSynthesizer();
+
+    HttpSpeechSynthesisParam param =
+        HttpSpeechSynthesisParam.builder()
+            .model("cosyvoice-v3-flash")
+            .text("我家的后面有一个很大的园。")
+            .voice("longanyang")
+            .format("wav")
+            .sampleRate(24000)
+            .build();
+
+    try {
+      // Non-SSE call - returns result with audio URL
+      HttpSpeechSynthesisResult result = synthesizer.call(param);
+
+      System.out.println("Request ID: " + result.getRequestId());
+      System.out.println("Finish Reason: " + result.getFinishReason());
+
+      if (result.hasAudioUrl()) {
+        AudioInfo audioInfo = result.getAudioInfo();
+        System.out.println("\nAudio URL: " + audioInfo.getUrl());
+        System.out.println("Audio ID: " + audioInfo.getId());
+        System.out.println("Expires At: " + audioInfo.getExpiresAt());
+        System.out.println("Remaining Time: " + audioInfo.getRemainingSeconds() + " seconds");
+        System.out.println("URL Expired: " + audioInfo.isExpired());
+
+        // You can download the audio from the URL
+        // Example: use HttpURLConnection or any HTTP client to download
+        System.out.println("\nTip: You can download the audio file from the URL above.");
+      }
+
+    } catch (ApiException | NoApiKeyException | InputRequiredException e) {
+      System.err.println("Synthesis failed: " + e.getMessage());
+    }
+  }
+
+  /** Demonstrates streaming call with callback - receives audio chunks as they arrive. */
+  public static void streamCallWithCallback() {
+    System.out.println("\n=== Streaming Call with Callback Example ===");
+
+    HttpSpeechSynthesizer synthesizer = new HttpSpeechSynthesizer();
+
+    HttpSpeechSynthesisParam param =
+        HttpSpeechSynthesisParam.builder()
+            .model("cosyvoice-v3-flash")
+            .text("今天天气真好，适合出去玩。")
+            .voice("longanyang")
+            .format("wav")
+            .sampleRate(24000)
+            .build();
+
+    // Use CountDownLatch to wait for completion
+    CountDownLatch latch = new CountDownLatch(1);
+
+    try {
+      synthesizer.streamCall(
+          param,
+          new ResultCallback<HttpSpeechSynthesisResult>() {
+            private int chunkCount = 0;
+
+            @Override
+            public void onEvent(HttpSpeechSynthesisResult result) {
+              chunkCount++;
+              if (result.hasAudioData()) {
+                System.out.println(
+                    "Received chunk #"
+                        + chunkCount
+                        + ", size: "
+                        + result.getAudioDataSize()
+                        + " bytes");
+              }
+              if (result.getRequestId() != null) {
+                System.out.println("Request ID: " + result.getRequestId());
+              }
+            }
+
+            @Override
+            public void onComplete() {
+              System.out.println("✓ Synthesis completed! Total chunks received: " + chunkCount);
+
+              // Get accumulated audio data
+              ByteBuffer audioData = synthesizer.getAccumulatedAudioData();
+              if (audioData != null) {
+                System.out.println("Total audio size: " + audioData.remaining() + " bytes");
+              }
+              latch.countDown();
+            }
+
+            @Override
+            public void onError(Exception e) {
+              System.err.println("✗ Error during synthesis: " + e.getMessage());
+              latch.countDown();
+            }
+          });
+
+      // Wait for completion
+      latch.await();
+      System.out.println("Done!");
+
+    } catch (ApiException | NoApiKeyException | InputRequiredException | InterruptedException e) {
+      System.err.println("Failed: " + e.getMessage());
+    }
+  }
+
+  /** Demonstrates custom parameter settings. */
+  public static void customParameters() {
+    System.out.println("\n=== Custom Parameters Example ===");
+
+    HttpSpeechSynthesizer synthesizer = new HttpSpeechSynthesizer();
+
+    // Build parameters with custom voice settings
+    HttpSpeechSynthesisParam param =
+        HttpSpeechSynthesisParam.builder()
+            .model("cosyvoice-v3-flash")
+            .text("这是一段测试语音合成参数的文本。")
+            .voice("longanyang")
+            .format("wav")
+            .sampleRate(24000)
+            .volume(80) // Volume: 0-100
+            .rate(1.2f) // Speech rate: 0.5-2.0
+            .pitch(1.1f) // Pitch: 0.5-2.0
+            .build();
+
+    System.out.println("Parameters:");
+    System.out.println("  Model: " + param.getModel());
+    System.out.println("  Text: " + param.getText());
+    System.out.println("  Voice: " + param.getVoice());
+    System.out.println("  Format: " + param.getFormat());
+    System.out.println("  Sample Rate: " + param.getSampleRate());
+    System.out.println("  Volume: " + param.getVolume());
+    System.out.println("  Rate: " + param.getRate());
+    System.out.println("  Pitch: " + param.getPitch());
+
+    try {
+      ByteBuffer audioData = synthesizer.callAndReturnAudio(param);
+      if (audioData != null) {
+        System.out.println(
+            "✓ Synthesis completed, audio size: " + audioData.remaining() + " bytes");
+      }
+    } catch (ApiException | NoApiKeyException | InputRequiredException e) {
+      System.err.println("Failed: " + e.getMessage());
+    }
+  }
+
+  public static void main(String[] args) {
+    Constants.apiKey = "YOUR_API_KEY";
+    System.out.println("HttpSpeechSynthesizer Usage Examples\n");
+    System.out.println("====================================\n");
+
+    // Run examples
+    syncCall(); // SSE streaming - returns audio data
+    syncCallWithUrl(); // Non-SSE - returns audio URL
+    streamCallWithCallback();
+    customParameters();
+
+    System.out.println("\n====================================");
+    System.out.println("All examples completed!");
+  }
+}
diff --git a/samples/Qwen3OmniToolCallUsage.java b/samples/Qwen3OmniToolCallUsage.java
new file mode 100644
index 0000000..634eaec
--- /dev/null
+++ b/samples/Qwen3OmniToolCallUsage.java
@@ -0,0 +1,365 @@
+import com.alibaba.dashscope.audio.omni.*;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+import com.google.gson.Gson;
+import com.google.gson.JsonObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Qwen3 Omni Tool Calling Support
+ * 
+ * This example demonstrates:
+ * 1. Function calling (tool) support with weather and flight price queries
+ * 2. Using createItem to send tool call results back to the server
+ */
+public class Qwen3OmniToolCallUsage {
+    private static final Logger log = LoggerFactory.getLogger(Qwen3OmniToolCallUsage.class);
+    private static final int AUDIO_CHUNK_SIZE = 3200; // Audio chunk size in bytes (200ms at 16kHz)
+    private static final int SLEEP_INTERVAL_MS = 200; // Sleep interval to simulate real-time streaming
+
+    // Store pending tool calls that need response
+    private static final Map<String, JsonObject> pendingToolCalls = new ConcurrentHashMap<>();
+
+    public static void main(String[] args) throws InterruptedException {
+        // Build connection parameters
+        OmniRealtimeParam param = OmniRealtimeParam.builder()
+                .model("model-name") // Replace with your model
+                .apikey("your-api-key")
+                .url("wss://dashscope.aliyuncs.com/api-ws/v1/realtime") // Custom URL if needed
+                .build();
+
+        final AtomicReference<StringBuilder> responseTextRef = new AtomicReference<>(new StringBuilder());
+        final CountDownLatch finishLatch = new CountDownLatch(1);
+
+        // Create conversation with callback
+        OmniRealtimeConversation conversation = new OmniRealtimeConversation(param, new OmniRealtimeCallback() {
+            private long lastPackageTime = 0;
+            private boolean isFirstText = true;
+            private boolean isFirstAudio = true;
+
+            @Override
+            public void onOpen() {
+                System.out.println("connection opened, ready to send audio");
+                lastPackageTime = System.currentTimeMillis();
+            }
+
+            @Override
+            public void onEvent(JsonObject message) {
+                String type = message.get("type").getAsString();
+
+                switch (type) {
+                    case "session.created":
+                        System.out.println("start session: " + message.get("session").getAsJsonObject().get("id").getAsString());
+                        break;
+
+                    case "conversation.item.input_audio_transcription.completed":
+                        System.out.println("question: " + message.get("transcript").getAsString());
+                        break;
+
+                    case "response.audio_transcript.delta":
+                    case "response.text.delta":
+                        if (isFirstText) {
+                            isFirstText = false;
+                            System.out.println("first text latency from vad end: " + (System.currentTimeMillis() - lastPackageTime) + " ms");
+                        }
+                        String text = message.get("delta").getAsString();
+                        responseTextRef.get().append(text);
+                        break;
+
+                    case "response.audio.delta":
+                        if (isFirstAudio) {
+                            isFirstAudio = false;
+                            System.out.println("first audio latency from vad end: " + (System.currentTimeMillis() - lastPackageTime) + " ms");
+                        }
+                        System.out.println("audio interval: " + (System.currentTimeMillis() - lastPackageTime) + " ms");
+                        lastPackageTime = System.currentTimeMillis();
+                        String recvAudioB64 = message.get("delta").getAsString();
+                        // Handle received audio - implement your own audio player here
+                        // audioPlayer.write(recvAudioB64);
+                        break;
+
+                    case "input_audio_buffer.speech_started":
+                        System.out.println("======VAD Speech Start======");
+                        // Cancel audio playback when user starts speaking
+                        // audioPlayer.cancelPlaying();
+                        break;
+
+                    case "input_audio_buffer.speech_stopped":
+                        System.out.println("======VAD Speech End======");
+                        lastPackageTime = System.currentTimeMillis();
+                        isFirstText = true;
+                        isFirstAudio = true;
+                        pendingToolCalls.clear();
+                        break;
+
+                    case "response.function_call_arguments.done":
+                        System.out.println("======TOOL CALL======");
+                        String toolCallId = message.get("call_id").getAsString();
+                        pendingToolCalls.put(toolCallId, message);
+                        break;
+
+                    case "response.done":
+                        System.out.println("======RESPONSE DONE======");
+                        System.out.println("all response text: " + responseTextRef.get());
+                        responseTextRef.set(new StringBuilder()); // Clear for next response
+                        break;
+
+                    default:
+                        break;
+                }
+            }
+
+            @Override
+            public void onClose(int code, String reason) {
+                System.out.println("connection closed with code: " + code + ", reason: " + reason);
+                finishLatch.countDown();
+            }
+        });
+
+        try {
+            conversation.connect();
+        } catch (NoApiKeyException e) {
+            throw new RuntimeException(e);
+        }
+
+        // Build tools definition
+        List<Map<String, Object>> tools = buildTools();
+
+        // Configure session with tools and server VAD
+        Map<String, Object> extraParams = new HashMap<>();
+        extraParams.put("tools", tools);
+
+        OmniRealtimeConfig config = OmniRealtimeConfig.builder()
+                .modalities(Arrays.asList(OmniRealtimeModality.AUDIO, OmniRealtimeModality.TEXT))
+                .voice("Ethan") // Voice name
+                .inputAudioFormat(OmniRealtimeAudioFormat.PCM_16000HZ_MONO_16BIT)
+                .outputAudioFormat(OmniRealtimeAudioFormat.PCM_24000HZ_MONO_16BIT)
+                .enableInputAudioTranscription(true)
+                .InputAudioTranscription("gummy-realtime-v1") // Transcription model
+                .enableTurnDetection(true)
+                .turnDetectionType("server_vad")
+                .parameters(extraParams) // Pass tools through extra parameters
+                .build();
+
+        conversation.updateSession(config);
+
+        System.out.println("Press 'Ctrl+C' to stop conversation...");
+
+        // Main loop - read audio from file and send to server
+        // In a real application, you would read from microphone
+        String filePath = "./weather.wav";
+        File audioFile = new File(filePath);
+
+        if (!audioFile.exists()) {
+            log.error("Audio file not found: {}", filePath);
+            System.out.println("Waiting for interactive session. Press Ctrl+C to exit.");
+            // For demo purposes, just wait
+            finishLatch.await();
+            return;
+        }
+
+        try (FileInputStream audioInputStream = new FileInputStream(audioFile)) {
+            byte[] audioBuffer = new byte[AUDIO_CHUNK_SIZE];
+            int bytesRead;
+
+            log.info("Starting to send audio data from: {}", filePath);
+
+            while ((bytesRead = audioInputStream.read(audioBuffer)) != -1) {
+                // Check and handle pending tool calls
+                boolean needResponse = handlePendingToolCalls(conversation);
+
+                if (needResponse) {
+                    System.out.println("*** create response after call tools");
+                    conversation.createResponse(null, Arrays.asList(OmniRealtimeModality.AUDIO, OmniRealtimeModality.TEXT));
+                    System.out.println("======TOOL CALL END======");
+                }
+
+                // Send audio data
+                String audioB64 = Base64.getEncoder().encodeToString(Arrays.copyOf(audioBuffer, bytesRead));
+                conversation.appendAudio(audioB64);
+
+                // Add small delay to simulate real-time audio streaming
+                Thread.sleep(SLEEP_INTERVAL_MS);
+            }
+
+            log.info("Finished sending audio data.");
+
+        } catch (Exception e) {
+            log.error("Error sending audio from file: {}", filePath, e);
+        }
+        //wait 5 seconds for demo response done
+        Thread.sleep(5 * 1000);
+        conversation.close(1000, "bye");
+        finishLatch.await();
+        System.exit(0);
+    }
+
+    /**
+     * Build tool definitions in OpenAI format
+     */
+    private static List<Map<String, Object>> buildTools() {
+        List<Map<String, Object>> tools = new ArrayList<>();
+
+        // Tool: get_current_weather
+        Map<String, Object> weatherTool = new HashMap<>();
+        weatherTool.put("type", "function");
+        Map<String, Object> weatherFunction = new HashMap<>();
+        weatherFunction.put("name", "get_current_weather");
+        weatherFunction.put("description", "当你想查询指定城市的天气时非常有用。");
+        Map<String, Object> weatherParams = new HashMap<>();
+        weatherParams.put("type", "object");
+        Map<String, Object> locationProp = new HashMap<>();
+        locationProp.put("type", "string");
+        locationProp.put("description", "城市或县区，比如北京市、杭州市、余杭区等。");
+        Map<String, Object> weatherProps = new HashMap<>();
+        weatherProps.put("location", locationProp);
+        weatherParams.put("properties", weatherProps);
+        weatherParams.put("required", Collections.singletonList("location"));
+        weatherFunction.put("parameters", weatherParams);
+        weatherTool.put("function", weatherFunction);
+        tools.add(weatherTool);
+
+        // Tool: get_flight_price
+        Map<String, Object> flightTool = new HashMap<>();
+        flightTool.put("type", "function");
+        Map<String, Object> flightFunction = new HashMap<>();
+        flightFunction.put("name", "get_flight_price");
+        flightFunction.put("description", "当你想查询飞机票价格时非常有用。");
+        Map<String, Object> flightParams = new HashMap<>();
+        flightParams.put("type", "object");
+        Map<String, Object> srcProp = new HashMap<>();
+        srcProp.put("type", "string");
+        srcProp.put("description", "飞机起飞的城市，比如北京市、杭州市等。");
+        Map<String, Object> dstProp = new HashMap<>();
+        dstProp.put("type", "string");
+        dstProp.put("description", "飞机降落的城市，比如北京市、杭州市区等。");
+        Map<String, Object> flightProps = new HashMap<>();
+        flightProps.put("src", srcProp);
+        flightProps.put("dst", dstProp);
+        flightParams.put("properties", flightProps);
+        flightParams.put("required", Arrays.asList("src", "dst"));
+        flightFunction.put("parameters", flightParams);
+        flightTool.put("function", flightFunction);
+        tools.add(flightTool);
+
+        // Tool: get_train_price
+        Map<String, Object> trainTool = new HashMap<>();
+        trainTool.put("type", "function");
+        Map<String, Object> trainFunction = new HashMap<>();
+        trainFunction.put("name", "get_train_price");
+        trainFunction.put("description", "当你想查询火车票价格时非常有用。");
+        Map<String, Object> trainParams = new HashMap<>();
+        trainParams.put("type", "object");
+        Map<String, Object> trainSrcProp = new HashMap<>();
+        trainSrcProp.put("type", "string");
+        trainSrcProp.put("description", "火车出发的城市，比如北京市、杭州市等。");
+        Map<String, Object> trainDstProp = new HashMap<>();
+        trainDstProp.put("type", "string");
+        trainDstProp.put("description", "火车到达的城市，比如北京市、杭州市区等。");
+        Map<String, Object> trainProps = new HashMap<>();
+        trainProps.put("src", trainSrcProp);
+        trainProps.put("dst", trainDstProp);
+        trainParams.put("properties", trainProps);
+        trainParams.put("required", Arrays.asList("src", "dst"));
+        trainFunction.put("parameters", trainParams);
+        trainTool.put("function", trainFunction);
+        tools.add(trainTool);
+
+        return tools;
+    }
+
+    /**
+     * Handle pending tool calls by executing local functions and sending results back
+     */
+    private static boolean handlePendingToolCalls(OmniRealtimeConversation conversation) {
+        boolean needResponse = false;
+
+        for (Map.Entry<String, JsonObject> entry : pendingToolCalls.entrySet()) {
+            JsonObject toolCallResponse = entry.getValue();
+
+            // Process tool call
+            JsonObject result = handleToolCall(toolCallResponse);
+
+            // Send result back using createItem
+            sendToolCallResult(conversation, result);
+
+            needResponse = true;
+            pendingToolCalls.remove(entry.getKey());
+        }
+
+        return needResponse;
+    }
+
+    /**
+     * Handle a single tool call and return the result
+     */
+    private static JsonObject handleToolCall(JsonObject toolCallResponse) {
+        String functionName = toolCallResponse.get("name").getAsString();
+        JsonObject arguments = new Gson().fromJson(toolCallResponse.get("arguments").getAsString(), JsonObject.class);
+
+        System.out.println("[Tool Call] start handling tool call: name: " + functionName + ", args: " + arguments);
+
+        String output;
+        switch (functionName) {
+            case "get_current_weather":
+                String location = arguments.get("location").getAsString();
+                output = getCurrentWeather(location);
+                break;
+            case "get_flight_price":
+                String src = arguments.get("src").getAsString();
+                String dst = arguments.get("dst").getAsString();
+                output = getFlightPrice(src, dst);
+                break;
+            case "get_train_price":
+                String trainSrc = arguments.get("src").getAsString();
+                String trainDst = arguments.get("dst").getAsString();
+                output = getTrainPrice(trainSrc, trainDst);
+                break;
+            default:
+                output = "client没有找到这个工具，调用失败。";
+                break;
+        }
+
+        System.out.println("[Tool Call] tool call response: " + output);
+
+        // Build result object
+        JsonObject result = new JsonObject();
+        result.addProperty("call_id", toolCallResponse.get("call_id").getAsString());
+        result.addProperty("output", output);
+        return result;
+    }
+
+    /**
+     * Send tool call result back to server using createItem
+     */
+    private static void sendToolCallResult(OmniRealtimeConversation conversation, JsonObject result) {
+        JsonObject item = new JsonObject();
+        item.addProperty("id", "item_" + UUID.randomUUID().toString().replace("-", ""));
+        item.addProperty("type", "function_call_output");
+        item.addProperty("call_id", result.get("call_id").getAsString());
+        item.addProperty("output", result.get("output").getAsString());
+
+        conversation.createItem(item);
+    }
+
+    // ===== Local tool implementations =====
+
+    private static String getCurrentWeather(String location) {
+        return location + "今天天气为霾转晴，气温4/-4℃，微风";
+    }
+
+    private static String getFlightPrice(String src, String dst) {
+        return src + "到" + dst + "的机票价格为200~300美元。";
+    }
+
+    private static String getTrainPrice(String src, String dst) {
+        return "invalid apikey error";
+    }
+}
diff --git a/src/main/java/com/alibaba/dashscope/audio/http_tts/AudioInfo.java b/src/main/java/com/alibaba/dashscope/audio/http_tts/AudioInfo.java
new file mode 100644
index 0000000..f47867f
--- /dev/null
+++ b/src/main/java/com/alibaba/dashscope/audio/http_tts/AudioInfo.java
@@ -0,0 +1,63 @@
+// Copyright (c) Alibaba, Inc. and its affiliates.
+
+package com.alibaba.dashscope.audio.http_tts;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * Audio information containing URL and metadata for non-SSE synthesis results. When using non-SSE
+ * synchronous call, the audio is returned as a URL instead of binary data.
+ *
+ * @author DashScope SDK Team
+ */
+@Data
+@EqualsAndHashCode
+public class AudioInfo {
+
+  /** The audio URL for downloading the synthesized audio file. */
+  private String url;
+
+  /** The unique identifier for this audio file. */
+  private String id;
+
+  /** The expiration timestamp (Unix timestamp in seconds) for the URL. */
+  private Long expiresAt;
+
+  /** The audio data in base64 format (if available). */
+  private String data;
+
+  /**
+   * Checks if this audio info has a valid URL.
+   *
+   * @return true if URL is available, false otherwise
+   */
+  public boolean hasUrl() {
+    return url != null && !url.isEmpty();
+  }
+
+  /**
+   * Checks if the URL has expired.
+   *
+   * @return true if expired, false if still valid or expiration unknown
+   */
+  public boolean isExpired() {
+    if (expiresAt == null) {
+      return false;
+    }
+    return System.currentTimeMillis() / 1000 > expiresAt;
+  }
+
+  /**
+   * Gets the remaining time before URL expiration in seconds.
+   *
+   * @return remaining seconds, or -1 if expiration unknown or already expired
+   */
+  public long getRemainingSeconds() {
+    if (expiresAt == null) {
+      return -1;
+    }
+    long remaining = expiresAt - System.currentTimeMillis() / 1000;
+    return remaining > 0 ? remaining : -1;
+  }
+}
diff --git a/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisParam.java b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisParam.java
new file mode 100644
index 0000000..0c9af31
--- /dev/null
+++ b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisParam.java
@@ -0,0 +1,110 @@
+// Copyright (c) Alibaba, Inc. and its affiliates.
+
+package com.alibaba.dashscope.audio.http_tts;
+
+import com.alibaba.dashscope.base.HalfDuplexServiceParam;
+import com.alibaba.dashscope.exception.InputRequiredException;
+import com.google.gson.JsonObject;
+import java.nio.ByteBuffer;
+import lombok.*;
+import lombok.experimental.SuperBuilder;
+
+/**
+ * HTTP TTS (Text-to-Speech) synthesis parameter class. Supports HTTP SSE-based speech synthesis API
+ * calls for models like CosyVoice.
+ *
+ * <p>Example usage:
+ *
+ * <pre>{@code
+ * HttpSpeechSynthesisParam param = HttpSpeechSynthesisParam.builder()
+ *     .model("cosyvoice-v3-flash")
+ *     .text("你好，欢迎使用语音合成服务。")
+ *     .voice("longanyang")
+ *     .format("wav")
+ *     .sampleRate(24000)
+ *     .build();
+ * }</pre>
+ *
+ * @author DashScope SDK Team
+ */
+@Data
+@SuperBuilder
+@EqualsAndHashCode(callSuper = true)
+public class HttpSpeechSynthesisParam extends HalfDuplexServiceParam {
+
+  /** The text to be synthesized into speech. */
+  @NonNull private String text;
+
+  /** The voice name for synthesis (e.g., "longanyang", "longxiaochun"). */
+  private String voice;
+
+  /** The audio format (e.g., "wav", "mp3", "pcm"). */
+  @Builder.Default private String format = "wav";
+
+  /** The sample rate in Hz (e.g., 8000, 16000, 24000, 48000). */
+  @Builder.Default private Integer sampleRate = 16000;
+
+  /** The audio volume (0-100). */
+  @Builder.Default private Integer volume = 50;
+
+  /** The speech rate (0.5-2.0). */
+  @Builder.Default private Float rate = 1.0f;
+
+  /** The pitch rate (0.5-2.0). */
+  @Builder.Default private Float pitch = 1.0f;
+
+  @Override
+  public JsonObject getHttpBody() {
+    JsonObject body = new JsonObject();
+    body.addProperty("model", getModel());
+
+    // Build input object
+    JsonObject input = new JsonObject();
+    input.addProperty("text", text);
+
+    if (voice != null && !voice.isEmpty()) {
+      input.addProperty("voice", voice);
+    }
+    if (format != null && !format.isEmpty()) {
+      input.addProperty("format", format);
+    }
+    if (sampleRate != null) {
+      input.addProperty("sample_rate", sampleRate);
+    }
+    if (volume != null) {
+      input.addProperty("volume", volume);
+    }
+    if (rate != null) {
+      input.addProperty("rate", rate);
+    }
+    if (pitch != null) {
+      input.addProperty("pitch", pitch);
+    }
+
+    body.add("input", input);
+
+    return body;
+  }
+
+  @Override
+  public Object getInput() {
+    JsonObject input = new JsonObject();
+    input.addProperty("text", text);
+    return input;
+  }
+
+  @Override
+  public ByteBuffer getBinaryData() {
+    return null;
+  }
+
+  @Override
+  public void validate() throws InputRequiredException {
+    if (text == null || text.trim().isEmpty()) {
+      throw new InputRequiredException("text is required and cannot be empty");
+    }
+    if (getModel() == null || getModel().trim().isEmpty()) {
+      throw new InputRequiredException("model is required");
+    }
+  }
+}
diff --git a/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisResult.java b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisResult.java
new file mode 100644
index 0000000..0f733c1
--- /dev/null
+++ b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesisResult.java
@@ -0,0 +1,75 @@
+// Copyright (c) Alibaba, Inc. and its affiliates.
+
+package com.alibaba.dashscope.audio.http_tts;
+
+import com.alibaba.dashscope.audio.tts.SpeechSynthesisUsage;
+import com.google.gson.JsonObject;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * Result class for HTTP TTS synthesis. Contains the synthesized audio data and related metadata.
+ *
+ * <p>For SSE streaming calls, the result contains binary audio data in {@link #audioData}. For
+ * non-SSE synchronous calls, the result contains an audio URL in {@link #audioInfo}.
+ *
+ * @author DashScope SDK Team
+ */
+@Data
+@EqualsAndHashCode
+public class HttpSpeechSynthesisResult {
+
+  /** The request ID for tracking. */
+  private String requestId;
+
+  /** The audio data in binary format (for SSE streaming calls). */
+  private byte[] audioData;
+
+  /** The audio URL and metadata (for non-SSE synchronous calls). */
+  private AudioInfo audioInfo;
+
+  /** The usage statistics (if available). */
+  private SpeechSynthesisUsage usage;
+
+  /** The raw output from the API (may contain additional metadata). */
+  private JsonObject output;
+
+  /** The finish reason (e.g., "stop"). */
+  private String finishReason;
+
+  /**
+   * Checks if audio data is present in this result (SSE mode).
+   *
+   * @return true if audio data is available, false otherwise
+   */
+  public boolean hasAudioData() {
+    return audioData != null && audioData.length > 0;
+  }
+
+  /**
+   * Gets the size of the audio data in bytes.
+   *
+   * @return the size in bytes, or 0 if no audio data is present
+   */
+  public int getAudioDataSize() {
+    return audioData != null ? audioData.length : 0;
+  }
+
+  /**
+   * Checks if audio URL is present in this result (non-SSE mode).
+   *
+   * @return true if audio URL is available, false otherwise
+   */
+  public boolean hasAudioUrl() {
+    return audioInfo != null && audioInfo.hasUrl();
+  }
+
+  /**
+   * Gets the audio URL.
+   *
+   * @return the audio URL, or null if not available
+   */
+  public String getAudioUrl() {
+    return audioInfo != null ? audioInfo.getUrl() : null;
+  }
+}
diff --git a/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesizer.java b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesizer.java
new file mode 100644
index 0000000..1e7133d
--- /dev/null
+++ b/src/main/java/com/alibaba/dashscope/audio/http_tts/HttpSpeechSynthesizer.java
@@ -0,0 +1,403 @@
+// Copyright (c) Alibaba, Inc. and its affiliates.
+
+package com.alibaba.dashscope.audio.http_tts;
+
+import com.alibaba.dashscope.api.SynchronizeHalfDuplexApi;
+import com.alibaba.dashscope.audio.tts.SpeechSynthesisUsage;
+import com.alibaba.dashscope.common.*;
+import com.alibaba.dashscope.common.Status;
+import com.alibaba.dashscope.exception.ApiException;
+import com.alibaba.dashscope.exception.InputRequiredException;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+import com.alibaba.dashscope.protocol.ApiServiceOption;
+import com.alibaba.dashscope.protocol.HttpMethod;
+import com.alibaba.dashscope.protocol.Protocol;
+import com.alibaba.dashscope.protocol.StreamingMode;
+import com.alibaba.dashscope.utils.JsonUtils;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import io.reactivex.Flowable;
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * HTTP-based Speech Synthesizer using Server-Sent Events (SSE). This class provides a simple
+ * interface for text-to-speech synthesis via HTTP SSE protocol.
+ *
+ * <p>Supports models like CosyVoice (cosyvoice-v3-flash, etc.) that use HTTP SSE for streaming
+ * synthesis.
+ *
+ * @author songsong.sss
+ */
+@Slf4j
+public class HttpSpeechSynthesizer {
+
+  private final ThreadLocal<ByteBuffer> accumulatedAudioData = new ThreadLocal<>();
+
+  /** Creates a new HttpSpeechSynthesizer instance with default settings. */
+  public HttpSpeechSynthesizer() {}
+
+  public ByteBuffer getAccumulatedAudioData() {
+    return accumulatedAudioData.get();
+  }
+
+  /**
+   * Creates a per-request ApiServiceOption with the specified SSE setting.
+   */
+  private SynchronizeHalfDuplexApi<HttpSpeechSynthesisParam> createApi(boolean isSSE) {
+    ApiServiceOption serviceOption =
+        ApiServiceOption.builder()
+            .protocol(Protocol.HTTP)
+            .httpMethod(HttpMethod.POST)
+            .streamingMode(StreamingMode.OUT)
+            .outputMode(OutputMode.ACCUMULATE)
+            .taskGroup(TaskGroup.AUDIO.getValue())
+            .task(Task.TEXT_TO_SPEECH.getValue())
+            .function(Function.SPEECH_SYNTHESIZER.getValue())
+            .isSSE(isSSE)
+            .build();
+    return new SynchronizeHalfDuplexApi<>(serviceOption);
+  }
+
+  /**
+   * Synchronous call with SSE enabled - synthesizes speech and returns complete audio data. This
+   * method blocks until the synthesis is complete.
+   *
+   * <p>Use this method when you need the audio data streamed back in real-time.
+   *
+   * @param param The synthesis parameters
+   * @return ByteBuffer containing the complete audio data
+   * @throws ApiException If the API call fails
+   * @throws NoApiKeyException If the API key is not configured
+   * @throws InputRequiredException If required parameters are missing
+   */
+  public ByteBuffer callAndReturnAudio(HttpSpeechSynthesisParam param)
+      throws ApiException, NoApiKeyException, InputRequiredException {
+    param.validate();
+    accumulatedAudioData.remove();
+
+    SynchronizeHalfDuplexApi<HttpSpeechSynthesisParam> api = createApi(true);
+    ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream();
+
+    try {
+      Flowable<DashScopeResult> flowable = api.streamCall(param);
+
+      flowable.blockingForEach(
+          result -> {
+            processAudioResult(result, audioBuffer);
+          });
+
+      ByteBuffer audioData = ByteBuffer.wrap(audioBuffer.toByteArray());
+      accumulatedAudioData.set(audioData);
+      return audioData;
+
+    } catch (ApiException | NoApiKeyException e) {
+      throw e;
+    } catch (Exception e) {
+      log.error("Speech synthesis failed", e);
+      throw new ApiException(e);
+    }
+  }
+
+  /**
+   * Synchronous call without SSE - returns a result containing the audio URL. This is a simpler,
+   * faster call that returns a download URL instead of streaming audio data.
+   *
+   * <p>Use this method when you want to get the audio URL and download it later.
+   *
+   * <p>Example response:
+   *
+   * <pre>{@code
+   * {
+   *   "request_id": "xxx",
+   *   "output": {
+   *     "finish_reason": "stop",
+   *     "audio": {
+   *       "url": "http://dashscope-result-bj.oss-cn-beijing.aliyuncs.com/...",
+   *       "id": "audio_xxx",
+   *       "expires_at": 1772697707
+   *     }
+   *   },
+   *   "usage": { "characters": 15 }
+   * }
+   * }</pre>
+   *
+   * @param param The synthesis parameters
+   * @return HttpSpeechSynthesisResult containing audio URL and metadata
+   * @throws ApiException If the API call fails
+   * @throws NoApiKeyException If the API key is not configured
+   * @throws InputRequiredException If required parameters are missing
+   */
+  public HttpSpeechSynthesisResult call(HttpSpeechSynthesisParam param)
+      throws ApiException, NoApiKeyException, InputRequiredException {
+    param.validate();
+    accumulatedAudioData.remove();
+
+    SynchronizeHalfDuplexApi<HttpSpeechSynthesisParam> api = createApi(false);
+    try {
+      DashScopeResult result = api.call(param);
+      return convertNonSSEResult(result);
+    } catch (Exception e) {
+      log.error("Synchronous speech synthesis failed", e);
+      throw new ApiException(e);
+    }
+  }
+
+  /**
+   * Streaming call with callback interface. Results are delivered through the callback as they
+   * arrive.
+   *
+   * @param param The synthesis parameters
+   * @param callback The callback to receive synthesis results
+   * @throws ApiException If the API call fails
+   * @throws NoApiKeyException If the API key is not configured
+   * @throws InputRequiredException If required parameters are missing
+   */
+  public void streamCall(
+      HttpSpeechSynthesisParam param, ResultCallback<HttpSpeechSynthesisResult> callback)
+      throws ApiException, NoApiKeyException, InputRequiredException {
+    param.validate();
+    accumulatedAudioData.remove();
+
+    SynchronizeHalfDuplexApi<HttpSpeechSynthesisParam> api = createApi(true);
+    ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream();
+
+    try {
+      api.streamCall(
+          param,
+          new ResultCallback<DashScopeResult>() {
+            @Override
+            public void onEvent(DashScopeResult message) {
+              try {
+                HttpSpeechSynthesisResult result = convertResult(message);
+                if (result.getAudioData() != null) {
+                  audioBuffer.write(result.getAudioData());
+                }
+                callback.onEvent(result);
+              } catch (Exception e) {
+                log.error("Failed to process audio result", e);
+                callback.onError(e);
+              }
+            }
+
+            @Override
+            public void onComplete() {
+              try {
+                accumulatedAudioData.set(ByteBuffer.wrap(audioBuffer.toByteArray()));
+                callback.onComplete();
+              } catch (Exception e) {
+                log.error("Failed to complete synthesis", e);
+                callback.onError(e);
+              }
+            }
+
+            @Override
+            public void onError(Exception e) {
+              callback.onError(e);
+            }
+          });
+    } catch (Exception e) {
+      log.error("Streaming call failed", e);
+      throw new ApiException(e);
+    }
+  }
+
+  /**
+   * Gets the first package delay (time from request to first audio data). Only available after a
+   * call has been made.
+   *
+   * @return The delay in milliseconds, or -1 if not available
+   */
+  public long getFirstPackageDelay() {
+    // This would require timestamp tracking during the call
+    return -1;
+  }
+
+  /** Processes audio result from DashScope API response. */
+  private void processAudioResult(DashScopeResult result, ByteArrayOutputStream audioBuffer) {
+    if (result == null) {
+      return;
+    }
+
+    try {
+      byte[] audioBytes = extractAudioData(result);
+      if (audioBytes != null && audioBytes.length > 0) {
+        audioBuffer.write(audioBytes);
+      }
+    } catch (Exception e) {
+      log.error("Failed to extract audio data from result", e);
+    }
+  }
+
+  /** Converts DashScopeResult to HttpSpeechSynthesisResult. */
+  private HttpSpeechSynthesisResult convertResult(DashScopeResult dashScopeResult) {
+    // Check for API error response
+    if (dashScopeResult.getCode() != null && !dashScopeResult.getCode().isEmpty()) {
+      String errorMsg =
+          dashScopeResult.getMessage() != null ? dashScopeResult.getMessage() : "Unknown error";
+      Status status =
+          Status.builder()
+              .statusCode(
+                  dashScopeResult.getStatusCode() != null ? dashScopeResult.getStatusCode() : 400)
+              .code(dashScopeResult.getCode())
+              .message(errorMsg)
+              .requestId(dashScopeResult.getRequestId())
+              .build();
+      throw new ApiException(status);
+    }
+
+    HttpSpeechSynthesisResult result = new HttpSpeechSynthesisResult();
+
+    if (dashScopeResult.getRequestId() != null) {
+      result.setRequestId(dashScopeResult.getRequestId());
+    }
+
+    byte[] audioData = extractAudioData(dashScopeResult);
+    if (audioData != null) {
+      result.setAudioData(audioData);
+    }
+
+    if (dashScopeResult.getUsage() != null) {
+      try {
+        SpeechSynthesisUsage usage =
+            JsonUtils.fromJsonObject(
+                dashScopeResult.getUsage().getAsJsonObject(), SpeechSynthesisUsage.class);
+        result.setUsage(usage);
+      } catch (Exception e) {
+        log.debug("Failed to parse usage information", e);
+      }
+    }
+
+    if (dashScopeResult.getOutput() != null && dashScopeResult.getOutput() instanceof JsonObject) {
+      result.setOutput((JsonObject) dashScopeResult.getOutput());
+    }
+
+    return result;
+  }
+
+  /**
+   * Extracts audio data from DashScope API response. The audio data is typically Base64-encoded in
+   * the response.
+   */
+  private byte[] extractAudioData(DashScopeResult result) {
+    if (result == null) {
+      return null;
+    }
+
+    // Try to get audio from output (Base64 encoded)
+    if (result.getOutput() != null && result.getOutput() instanceof JsonObject) {
+      JsonObject output = (JsonObject) result.getOutput();
+
+      // Try common audio field names
+      if (output.has("audio")) {
+        JsonElement audioElement = output.get("audio");
+        // audio could be a Base64 string or a JSON object with data field
+        if (audioElement.isJsonPrimitive()) {
+          String audioBase64 = audioElement.getAsString();
+          if (audioBase64 != null && !audioBase64.isEmpty()) {
+            try {
+              return Base64.getDecoder().decode(audioBase64);
+            } catch (IllegalArgumentException e) {
+              log.warn("Failed to decode Base64 audio data", e);
+            }
+          }
+        } else if (audioElement.isJsonObject()) {
+          // audio is an object with fields like url, id, data
+          JsonObject audioObj = audioElement.getAsJsonObject();
+          if (audioObj.has("data") && !audioObj.get("data").isJsonNull()) {
+            String audioBase64 = audioObj.get("data").getAsString();
+            if (audioBase64 != null && !audioBase64.isEmpty()) {
+              try {
+                return Base64.getDecoder().decode(audioBase64);
+              } catch (IllegalArgumentException e) {
+                log.warn("Failed to decode Base64 audio data from audio.data", e);
+              }
+            }
+          }
+        }
+      }
+
+      // Some APIs may return audio in binary field
+      if (output.has("binary")) {
+        String binaryBase64 = output.get("binary").getAsString();
+        if (binaryBase64 != null && !binaryBase64.isEmpty()) {
+          try {
+            return Base64.getDecoder().decode(binaryBase64);
+          } catch (IllegalArgumentException e) {
+            log.warn("Failed to decode Base64 binary data", e);
+          }
+        }
+      }
+    }
+
+    // Check if output is ByteBuffer (WebSocket-style)
+    if (result.getOutput() instanceof ByteBuffer) {
+      ByteBuffer buffer = (ByteBuffer) result.getOutput();
+      byte[] data = new byte[buffer.remaining()];
+      buffer.get(data);
+      return data;
+    }
+
+    return null;
+  }
+
+  /**
+   * Converts DashScopeResult from non-SSE call to HttpSpeechSynthesisResult. Non-SSE call returns
+   * audio URL instead of binary data.
+   */
+  private HttpSpeechSynthesisResult convertNonSSEResult(DashScopeResult dashScopeResult) {
+    HttpSpeechSynthesisResult result = new HttpSpeechSynthesisResult();
+
+    if (dashScopeResult.getRequestId() != null) {
+      result.setRequestId(dashScopeResult.getRequestId());
+    }
+
+    // Parse output for audio URL information
+    if (dashScopeResult.getOutput() != null && dashScopeResult.getOutput() instanceof JsonObject) {
+      JsonObject output = (JsonObject) dashScopeResult.getOutput();
+      result.setOutput(output);
+
+      // Parse finish_reason
+      if (output.has("finish_reason")) {
+        result.setFinishReason(output.get("finish_reason").getAsString());
+      }
+
+      // Parse audio object (contains url, id, expires_at)
+      if (output.has("audio") && output.get("audio").isJsonObject()) {
+        JsonObject audio = output.getAsJsonObject("audio");
+        AudioInfo audioInfo = new AudioInfo();
+
+        if (audio.has("url")) {
+          audioInfo.setUrl(audio.get("url").getAsString());
+        }
+        if (audio.has("id")) {
+          audioInfo.setId(audio.get("id").getAsString());
+        }
+        if (audio.has("expires_at")) {
+          audioInfo.setExpiresAt(audio.get("expires_at").getAsLong());
+        }
+        if (audio.has("data") && !audio.get("data").isJsonNull()) {
+          audioInfo.setData(audio.get("data").getAsString());
+        }
+
+        result.setAudioInfo(audioInfo);
+      }
+    }
+
+    // Parse usage
+    if (dashScopeResult.getUsage() != null) {
+      try {
+        SpeechSynthesisUsage usage =
+            JsonUtils.fromJsonObject(
+                dashScopeResult.getUsage().getAsJsonObject(), SpeechSynthesisUsage.class);
+        result.setUsage(usage);
+      } catch (Exception e) {
+        log.debug("Failed to parse usage information", e);
+      }
+    }
+
+    return result;
+  }
+}
diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java
index dd5169c..f942f2b 100644
--- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java
+++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java
@@ -36,6 +36,7 @@ public class OmniRealtimeConstants {
   public static final String PROTOCOL_EVENT_TYPE_CREATE_RESPONSE = "response.create";
   public static final String PROTOCOL_EVENT_TYPE_CANCEL_RESPONSE = "response.cancel";
   public static final String PROTOCOL_EVENT_TYPE_FINISH_SESSION = "session.finish";
+  public static final String PROTOCOL_EVENT_TYPE_ITEM_CREATE = "conversation.item.create";
   public static final String PROTOCOL_RESPONSE_TYPE_SESSION_CREATED = "session.created";
   public static final String PROTOCOL_RESPONSE_TYPE_RESPONSE_CREATED = "response.created";
   public static final String PROTOCOL_RESPONSE_TYPE_AUDIO_TRANSCRIPT_DELTA =
diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java
index 80178bc..2b2dcfd 100644
--- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java
+++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java
@@ -14,6 +14,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import lombok.extern.slf4j.Slf4j;
@@ -71,7 +72,9 @@ public void connect() throws NoApiKeyException, InterruptedException {
     client = OkHttpClientFactory.getOkHttpClient();
     websocktetClient = client.newWebSocket(request, this);
     connectLatch.set(new CountDownLatch(1));
-    connectLatch.get().await();
+    if (!connectLatch.get().await(60, TimeUnit.SECONDS)) {
+      throw new RuntimeException("Connection timed out after 60 seconds");
+    }
   }
 
   // block wait server session done, max 20 seconds, then close connection
@@ -128,6 +131,21 @@ public void updateSession(OmniRealtimeConfig config) {
     sendMessage(createGson().toJson(update_request), true);
   }
 
+  /**
+   * send item to server by event conversation.item.create
+   *
+   * @param item item pass to server
+   */
+  public void createItem(JsonObject item) {
+    checkStatus();
+    Map<String, Object> item_request = new HashMap<>();
+    item_request.put(OmniRealtimeConstants.PROTOCOL_EVENT_ID, generateEventId());
+    item_request.put(
+        OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_ITEM_CREATE);
+    item_request.put("item", item);
+    sendMessage(createGson().toJson(item_request), true);
+  }
+
   /**
    * send audio in base64 format
    *
@@ -399,6 +417,7 @@ public void onClosed(WebSocket webSocket, int code, String reason) {
 
   @Override
   public void onFailure(WebSocket webSocket, Throwable t, Response response) {
+    connectLatch.get().countDown();
     log.error("WebSocket failed: " + t.getMessage());
   }
 
diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java
index a4dc9ba..c99bfdd 100644
--- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java
+++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java
@@ -13,6 +13,7 @@
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import lombok.extern.slf4j.Slf4j;
@@ -68,7 +69,9 @@ public void connect() throws NoApiKeyException, InterruptedException {
     client = OkHttpClientFactory.getOkHttpClient();
     websocktetClient = client.newWebSocket(request, this);
     connectLatch.set(new CountDownLatch(1));
-    connectLatch.get().await();
+    if (!connectLatch.get().await(60, TimeUnit.SECONDS)) {
+      throw new RuntimeException("Connection timed out after 60 seconds");
+    }
   }
 
   /**
@@ -303,6 +306,7 @@ public void onClosing(@NotNull WebSocket webSocket, int code, @NotNull String re
 
   @Override
   public void onFailure(WebSocket webSocket, Throwable t, Response response) {
+    connectLatch.get().countDown();
     log.error("WebSocket failed: " + t.getMessage());
   }
 }
diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java
index bb13dae..1519971 100644
--- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java
+++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java
@@ -132,7 +132,7 @@ private void establishWebSocketClient(
                 },
                 BackpressureStrategy.BUFFER);
         // wait for connection establish
-        flowable.blockingSubscribe();
+        flowable.timeout(60, TimeUnit.SECONDS).blockingSubscribe();
         return;
       } catch (Throwable ex) {
         reconnectionTimes += 1;