From 075ca37aceee55ae4af9076e8de01f7cf391ab81 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Sun, 19 Oct 2025 18:04:54 +0800 Subject: [PATCH 01/64] feat(app): support file_list --- samples/ApplicationCalls.java | 49 ++++++++++++++++++- .../alibaba/dashscope/app/AppKeywords.java | 2 + .../dashscope/app/ApplicationOutput.java | 4 ++ .../dashscope/app/ApplicationParam.java | 8 +++ 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/samples/ApplicationCalls.java b/samples/ApplicationCalls.java index 89cdbad..ed4a95c 100644 --- a/samples/ApplicationCalls.java +++ b/samples/ApplicationCalls.java @@ -318,10 +318,53 @@ public static void callWithThinking() throws NoApiKeyException, InputRequiredExc Application application = new Application(); Flowable result = application.streamCall(param); result.blockingForEach(data -> System.out.printf("result: %s%n", data)); -// result.blockingForEach(data -> System.out.printf(data.getOutput().getText())); System.out.print("\n"); } + /** + * Call with file list sample + * + * @throws NoApiKeyException Can not find api key + * @throws ApiException The request failed, possibly + * due to a network or data error. + * @throws InputRequiredException Missing inputs. + */ + public static void callWithFileList() + throws ApiException, NoApiKeyException, InputRequiredException { + ApplicationParam param = ApplicationParam.builder() + .appId(APP_ID) + .prompt("总结文件内容") + .files(Collections.singletonList( + "https://dashscope.oss-cn-beijing.aliyuncs.com/audios/welcome.mp3")) + .build(); + + Application application = new Application(); + ApplicationResult result = application.call(param); + + System.out.println(JsonUtils.toJson(result)); + } + + /** + * Stream call with file list sample + * + * @throws NoApiKeyException Can not find api key + * @throws InputRequiredException Missing inputs. + */ + public static void streamCallWithFileList() + throws NoApiKeyException, InputRequiredException { + ApplicationParam param = ApplicationParam.builder() + .appId(APP_ID) + .prompt("总结文件内容") + .files(Collections.singletonList( + "https://dashscope.oss-cn-beijing.aliyuncs.com/audios/welcome.mp3")) + .incrementalOutput(true) + .build(); + + Application application = new Application(); + Flowable result = application.streamCall(param); + result.blockingForEach(data -> System.out.println(JsonUtils.toJson(data))); + } + public static void main(String[] args) { try { @@ -335,7 +378,9 @@ public static void main(String[] args) { // callWithAssistantServing(); // ragCallWithDocReference(); // callWithMoreParameters(); - callWithThinking(); +// callWithThinking(); + callWithFileList(); +// streamCallWithFileList(); } catch (ApiException | NoApiKeyException | InputRequiredException e) { System.out.printf("Exception: %s", e.getMessage()); } diff --git a/src/main/java/com/alibaba/dashscope/app/AppKeywords.java b/src/main/java/com/alibaba/dashscope/app/AppKeywords.java index 298fd12..96792f4 100644 --- a/src/main/java/com/alibaba/dashscope/app/AppKeywords.java +++ b/src/main/java/com/alibaba/dashscope/app/AppKeywords.java @@ -36,6 +36,8 @@ public interface AppKeywords { String IMAGES = "image_list"; + String FILE_LIST = "file_list"; + String MCP_SERVERS = "mcp_servers"; String ENABLE_WEB_SEARCH = "enable_web_search"; diff --git a/src/main/java/com/alibaba/dashscope/app/ApplicationOutput.java b/src/main/java/com/alibaba/dashscope/app/ApplicationOutput.java index c191ad4..ad3f9df 100644 --- a/src/main/java/com/alibaba/dashscope/app/ApplicationOutput.java +++ b/src/main/java/com/alibaba/dashscope/app/ApplicationOutput.java @@ -31,6 +31,10 @@ public class ApplicationOutput { @SerializedName("session_id") private String sessionId; + /** Reject status indicates whether the response was rejected */ + @SerializedName("reject_status") + private Boolean rejectStatus; + /** Thoughts of model planning for app */ @SerializedName("thoughts") private List thoughts; diff --git a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java index 6ccdb07..73a277f 100644 --- a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java +++ b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java @@ -101,6 +101,9 @@ public class ApplicationParam extends HalfDuplexParamBase { /** image list */ private List images; + /** file list */ + private List files; + /** rag options */ private RagOptions ragOptions; @@ -267,6 +270,11 @@ public JsonObject getInput() { input.add(AppKeywords.IMAGES, imagesJson); } + if (files != null && !files.isEmpty()) { + JsonArray fileListJson = JsonUtils.toJsonElement(files).getAsJsonArray(); + input.add(AppKeywords.FILE_LIST, fileListJson); + } + return input; } From 197170dfc383a74335e36581cd3ce6283bc32685 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Mon, 20 Oct 2025 11:30:11 +0800 Subject: [PATCH 02/64] Add input.audio_url to support wan2.5 video generation --- samples/VideoSynthesisUsage.java | 8 +++----- .../aigc/videosynthesis/VideoSynthesisParam.java | 13 +++++++++++++ .../com/alibaba/dashscope/utils/ApiKeywords.java | 2 ++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/samples/VideoSynthesisUsage.java b/samples/VideoSynthesisUsage.java index 2273350..eade460 100644 --- a/samples/VideoSynthesisUsage.java +++ b/samples/VideoSynthesisUsage.java @@ -17,11 +17,9 @@ public static void basicCall() throws ApiException, NoApiKeyException, InputRequ VideoSynthesis vs = new VideoSynthesis(); VideoSynthesisParam param = VideoSynthesisParam.builder() - .model(VideoSynthesis.Models.WANX_2_1_I2V_TURBO) - // prompt not required - // .prompt("一只戴着绿色眼镜的小狗") -// .imgUrl("https://modelscope.oss-cn-beijing.aliyuncs.com/resource/dog.jpeg") - .imgUrl("file:///Users/xxx/Documents/source/dog.jpeg") + .model("wan2.5-t2v-preview") + .prompt("一只戴着绿色眼镜的小狗在唱rap") + .audioUrl("https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3") .build(); VideoSynthesisResult result = vs.call(param); System.out.println(result); diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index 754b828..d05a9b5 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -52,6 +52,9 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { /** The input image url, Generate the URL of the image referenced by the video */ @Builder.Default private String imgUrl = null; + /** The input audio url. */ + @Builder.Default private String audioUrl = null; + /** The extra parameters. */ @GsonExclude @Singular protected Map extraInputs; @@ -77,6 +80,8 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { @Builder.Default private Boolean watermark = null; + @Builder.Default private Boolean audio = null; + /** The inputs of the model. */ @Override public JsonObject getInput() { @@ -99,6 +104,9 @@ public JsonObject getInput() { if (imgUrl != null && !imgUrl.isEmpty()) { jsonObject.addProperty(IMG_URL, imgUrl); } + if (audioUrl != null && !audioUrl.isEmpty()) { + jsonObject.addProperty(AUDIO_URL, audioUrl); + } if (firstFrameUrl != null && !firstFrameUrl.isEmpty()) { jsonObject.addProperty(FIRST_FRAME_URL, firstFrameUrl); @@ -153,6 +161,9 @@ public Map getParameters() { if (watermark != null) { params.put(WATERMARK, watermark); } + if (audio != null) { + params.put(AUDIO, audio); + } params.putAll(super.getParameters()); return params; } @@ -184,6 +195,7 @@ public void validate() throws InputRequiredException {} public void checkAndUpload() throws NoApiKeyException, UploadFileException { Map inputChecks = new HashMap<>(); inputChecks.put(IMG_URL, this.imgUrl); + inputChecks.put(AUDIO_URL, this.audioUrl); inputChecks.put(FIRST_FRAME_URL, this.firstFrameUrl); inputChecks.put(LAST_FRAME_URL, this.lastFrameUrl); inputChecks.put(HEAD_FRAME, this.headFrame); @@ -195,6 +207,7 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { this.putHeader("X-DashScope-OssResourceResolve", "enable"); this.imgUrl = inputChecks.get(IMG_URL); + this.audioUrl = inputChecks.get(AUDIO_URL); this.firstFrameUrl = inputChecks.get(FIRST_FRAME_URL); this.lastFrameUrl = inputChecks.get(LAST_FRAME_URL); this.headFrame = inputChecks.get(HEAD_FRAME); diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 2ee6e94..936fc63 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -150,6 +150,8 @@ public class ApiKeywords { public static final String IMG_URL = "img_url"; + public static final String AUDIO_URL = "audio_url"; + public static final String REF_IMG = "ref_img"; public static final String MODALITIES = "modalities"; From 360c1ff04e0957ce24dae1c50ebd9554c92d72de Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Tue, 21 Oct 2025 14:08:45 +0800 Subject: [PATCH 03/64] support wan2.5 i2i --- .../aigc/imagesynthesis/ImageSynthesis.java | 4 ++-- .../imagesynthesis/ImageSynthesisParam.java | 17 +++++++++++++++++ .../alibaba/dashscope/utils/ApiKeywords.java | 2 ++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java index 0e2d0d7..22aba38 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java @@ -119,7 +119,7 @@ public ImageSynthesisResult asyncCall(ImageSynthesisParam param) throw new ApiException(e); } ApiServiceOption serviceOption = createServiceOptions; - if (param.getModel().contains("imageedit")) { + if (param.getModel().contains("imageedit") || param.getModel().contains("wan2.5-i2i")) { serviceOption.setTask("image2image"); } return ImageSynthesisResult.fromDashScopeResult( @@ -161,7 +161,7 @@ public ImageSynthesisResult call(ImageSynthesisParam param) throw new ApiException(e); } ApiServiceOption serviceOption = createServiceOptions; - if (param.getModel().contains("imageedit")) { + if (param.getModel().contains("imageedit") || param.getModel().contains("wan2.5-i2i")) { serviceOption.setTask("image2image"); } return ImageSynthesisResult.fromDashScopeResult( diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java index ed68732..1ac8e20 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java @@ -13,7 +13,9 @@ import com.alibaba.dashscope.utils.PreprocessInputImage; import com.google.gson.JsonObject; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.Builder; import lombok.Data; @@ -35,6 +37,7 @@ public class ImageSynthesisParam extends HalfDuplexServiceParam { private String negativePrompt; private String refImage; private String sketchImageUrl; + private List images; /** The specific functions to be achieved , see class ImageEditFunction */ @Builder.Default private String function = null; @@ -78,6 +81,9 @@ public JsonObject getInput() { if (maskImageUrl != null && !maskImageUrl.isEmpty()) { jsonObject.addProperty(MASK_IMAGE_URL, maskImageUrl); } + if (images != null && !images.isEmpty()) { + jsonObject.add(IMAGES, JsonUtils.toJsonArray(images)); + } if (extraInputs != null && !extraInputs.isEmpty()) { JsonObject extraInputsJsonObject = JsonUtils.parametersToJsonObject(extraInputs); jsonObject = JsonUtils.merge(jsonObject, extraInputsJsonObject); @@ -147,6 +153,10 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { inputChecks.put(SKETCH_IMAGE_URL, this.sketchImageUrl); inputChecks.put(BASE_IMAGE_URL, this.baseImageUrl); inputChecks.put(MASK_IMAGE_URL, this.maskImageUrl); + int imagesSize = this.images.size(); + for (int i = 0; i < imagesSize; i++) { + inputChecks.put(IMAGES + "[" + i + "]", this.images.get(i)); + } boolean isUpload = PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); @@ -157,6 +167,13 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { this.sketchImageUrl = inputChecks.get(SKETCH_IMAGE_URL); this.baseImageUrl = inputChecks.get(BASE_IMAGE_URL); this.maskImageUrl = inputChecks.get(MASK_IMAGE_URL); + List newImages = new ArrayList<>(); + for (int i = 0; i < imagesSize; i++) { + newImages.add(inputChecks.get(IMAGES + "[" + i + "]")); + } + if (!newImages.isEmpty()) { + this.images = newImages; + } } } } diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 936fc63..2891699 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -185,4 +185,6 @@ public class ApiKeywords { public static final String ANNOTATIONS = "annotations"; public static final String LANGUAGE_TYPE = "language_type"; + + public static final String IMAGES = "images"; } From 9b72b7d710aaa3b512e0ed7b7757d9d929bfea46 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Tue, 21 Oct 2025 15:01:44 +0800 Subject: [PATCH 04/64] fix wan2.5 i2i --- .../imagesynthesis/ImageSynthesisParam.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java index 1ac8e20..71e6226 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java @@ -153,9 +153,12 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { inputChecks.put(SKETCH_IMAGE_URL, this.sketchImageUrl); inputChecks.put(BASE_IMAGE_URL, this.baseImageUrl); inputChecks.put(MASK_IMAGE_URL, this.maskImageUrl); - int imagesSize = this.images.size(); - for (int i = 0; i < imagesSize; i++) { - inputChecks.put(IMAGES + "[" + i + "]", this.images.get(i)); + int imagesSize = 0; + if (this.images != null) { + imagesSize = this.images.size(); + for (int i = 0; i < imagesSize; i++) { + inputChecks.put(IMAGES + "[" + i + "]", this.images.get(i)); + } } boolean isUpload = PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); @@ -167,12 +170,10 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { this.sketchImageUrl = inputChecks.get(SKETCH_IMAGE_URL); this.baseImageUrl = inputChecks.get(BASE_IMAGE_URL); this.maskImageUrl = inputChecks.get(MASK_IMAGE_URL); - List newImages = new ArrayList<>(); - for (int i = 0; i < imagesSize; i++) { - newImages.add(inputChecks.get(IMAGES + "[" + i + "]")); - } - if (!newImages.isEmpty()) { - this.images = newImages; + if (imagesSize > 0) { + for (int i = 0; i < imagesSize; i++) { + this.images.add(inputChecks.get(IMAGES + "[" + i + "]")); + } } } } From f499a6dd81321a69e1e080fca8668e97e4d22fe6 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Tue, 21 Oct 2025 15:10:59 +0800 Subject: [PATCH 05/64] fix wan2.5 i2i --- .../dashscope/aigc/imagesynthesis/ImageSynthesisParam.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java index 71e6226..09b8e5d 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java @@ -171,9 +171,11 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { this.baseImageUrl = inputChecks.get(BASE_IMAGE_URL); this.maskImageUrl = inputChecks.get(MASK_IMAGE_URL); if (imagesSize > 0) { + List newImages = new ArrayList<>(); for (int i = 0; i < imagesSize; i++) { - this.images.add(inputChecks.get(IMAGES + "[" + i + "]")); + newImages.add(inputChecks.get(IMAGES + "[" + i + "]")); } + this.images = newImages; } } } From ac58590bc2053eeb2262b1ad54cb6a29a00438b5 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Tue, 21 Oct 2025 19:10:27 +0800 Subject: [PATCH 06/64] release version 2.21.13 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0ceb376..0e14f16 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.21.12 + 2.21.13 8 From 0c4c10e69e3b84d3d6d3de24208c26bc03995470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Fri, 26 Sep 2025 16:11:53 +0800 Subject: [PATCH 07/64] feat:app/multimodal-dialog: add upstream.asr_post_processing --- .../multimodal/MultiModalDialog.java | 13 ++- .../MultiModalDialogApiKeyWords.java | 7 ++ .../multimodal/MultiModalRequestParam.java | 82 ++++++++++++++++--- 3 files changed, 89 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java index 8dbd950..34aa6bd 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java @@ -1,5 +1,6 @@ package com.alibaba.dashscope.multimodal; +import com.alibaba.dashscope.Version; import com.alibaba.dashscope.api.SynchronizeFullDuplexApi; import com.alibaba.dashscope.common.*; import com.alibaba.dashscope.exception.ApiException; @@ -8,6 +9,7 @@ import com.alibaba.dashscope.protocol.ApiServiceOption; import com.alibaba.dashscope.protocol.Protocol; import com.alibaba.dashscope.protocol.StreamingMode; +import com.alibaba.dashscope.utils.Constants; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; import io.reactivex.BackpressureStrategy; @@ -95,7 +97,8 @@ public Flowable getStreamingData() { public static MultiModalRequestParamWithStream FromMultiModalParam( MultiModalRequestParam param, Flowable dataStream, String preRequestId) { - + ClientInfo clientInfo = param.getClientInfo(); + clientInfo.setSdk("dashscope-sdk-java "+ Version.version); return MultiModalRequestParamWithStream.builder() .parameter("pre_task_id", preRequestId) .headers(param.getHeaders()) @@ -103,7 +106,7 @@ public static MultiModalRequestParamWithStream FromMultiModalParam( .customInput(param.getCustomInput()) .bizParams(param.getBizParams()) .downStream(param.getDownStream()) - .clientInfo(param.getClientInfo()) + .clientInfo(clientInfo) .dialogAttributes(param.getDialogAttributes()) .images(param.getImages()) .dataStream(dataStream) @@ -402,6 +405,12 @@ public void updateInfo(MultiModalRequestParam.UpdateParams updateParams) { if (updateParams != null && updateParams.images != null) { requestParamWithStream.setImages(updateParams.images); } + if (updateParams != null && updateParams.upStream != null) { + requestParamWithStream.setUpStream(updateParams.upStream); + } + if (updateParams != null && updateParams.downStream != null) { + requestParamWithStream.setDownStream(updateParams.downStream); + } sendTextFrame("UpdateInfo"); } diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java index cdc3b4b..a559f6c 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java @@ -17,6 +17,12 @@ public class MultiModalDialogApiKeyWords { public static String CONST_NAME_UP_STREAM_AUDIO_FORMAT = "audio_format"; public static String CONST_NAME_UP_STREAM_TYPE = "type"; public static String CONST_NAME_UP_STREAM_MODE = "mode"; + public static String CONST_NAME_REPLACE_WORD_SOURCE = "source"; + public static String CONST_NAME_REPLACE_WORD_TARGET = "target"; + public static String CONST_NAME_REPLACE_WORD_MATCH_MODE = "match_mode"; + public static String CONST_NAME_REPLACE_WORDS = "replace_words"; + public static String CONST_NAME_ASR_POST_PROCESSING = "asr_post_processing"; + public static String CONST_NAME_VOCABULARY_ID = "vocabulary_id"; public static String CONST_NAME_DOWN_STREAM_VOICE = "voice"; public static String CONST_NAME_DOWN_STREAM_SAMPLE_RATE = "sample_rate"; @@ -43,6 +49,7 @@ public class MultiModalDialogApiKeyWords { public static String CONST_NAME_CLIENT_INFO_LOCATION_LONGITUDE = "longitude"; public static String CONST_NAME_CLIENT_INFO_LOCATION_CITY_NAME = "city_name"; public static String CONST_NAME_CLIENT_INFO_ACTIVE_FOREGROUND_APP = "active_foreground_app"; + public static String CONST_NAME_CLIENT_INFO_SDK = "sdk"; public static String CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS = "user_defined_params"; public static String CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS = "user_defined_tokens"; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java index 5e6b1b4..09128fd 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java @@ -7,12 +7,11 @@ import com.alibaba.dashscope.base.FullDuplexServiceParam; import io.reactivex.Flowable; -import lombok.Builder; -import lombok.Data; -import lombok.EqualsAndHashCode; +import lombok.*; import lombok.experimental.SuperBuilder; -import lombok.val; +import org.jetbrains.annotations.NotNull; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -58,9 +57,25 @@ public Map getInputs() { public static class UpStream { private String type = "AudioOnly"; private String mode; - // private int sampleRate; + private AsrPostProcessing asrPostProcessing; + @Builder.Default private Integer sampleRate = 16000; + private String vocabularyId = null; @Builder.Default private String audioFormat = CONST_AUDIO_FORMAT_PCM; //support pcm/opus private Map passThroughParams; + + @Builder + @Setter + public static class AsrPostProcessing { + private List replaceWords; + + @Builder + @Setter + public static class ReplaceWord { + private String source; + private String target; + private String matchMode; + } + } } @Builder @@ -83,6 +98,7 @@ public static class DialogAttributes { } @Builder + @Setter public static class ClientInfo { private String userId; private Device device; @@ -91,6 +107,7 @@ public static class ClientInfo { private Object status; private String activeForegroundApp; private Map passThroughParams; + private String sdk; @Builder public static class Network { @@ -133,6 +150,8 @@ public void clearParameters() { @Builder public static class UpdateParams { + UpStream upStream; + DownStream downStream; List images; BizParams bizParams; ClientInfo clientInfo; @@ -146,6 +165,16 @@ public Map getParameters() { upStreamParams.put(CONST_NAME_UP_STREAM_TYPE, upStream.type); upStreamParams.put(CONST_NAME_UP_STREAM_MODE, upStream.mode); upStreamParams.put(CONST_NAME_UP_STREAM_AUDIO_FORMAT, upStream.audioFormat); + if (upStream.asrPostProcessing != null){ + final var asrPostProcessingParams = getUpstreamAsrPostProcessing(); + if (!asrPostProcessingParams.isEmpty()) { + upStreamParams.put(CONST_NAME_ASR_POST_PROCESSING, asrPostProcessingParams); + } + } + upStreamParams.put(CONST_NAME_DOWN_STREAM_SAMPLE_RATE, upStream.sampleRate); + if (upStream.vocabularyId != null) { + upStreamParams.put(CONST_NAME_VOCABULARY_ID, upStream.vocabularyId); + } if (upStream.passThroughParams != null) { upStreamParams.putAll(upStream.passThroughParams); } @@ -199,17 +228,32 @@ public Map getParameters() { if (clientInfo.passThroughParams != null) { clientInfoParams.putAll(clientInfo.passThroughParams); } + if (clientInfo.sdk != null){ + clientInfoParams.put(CONST_NAME_CLIENT_INFO_SDK, clientInfo.sdk); + } params.put(CONST_NAME_CLIENT_INFO, clientInfoParams); } if (bizParams != null) { val bizParamsParams = new HashMap(); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS, bizParams.userDefinedParams); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS, bizParams.userDefinedTokens); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_TOOL_PROMPTS, bizParams.toolPrompts); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_QUERY_PARAMS, bizParams.userQueryParams); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_PROMPT_PARAMS, bizParams.userPromptParams); - bizParamsParams.put(CONST_NAME_BIZ_PARAMS_VIDEOS, bizParams.videos); + if (bizParams.userDefinedParams != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS, bizParams.userDefinedParams); + } + if (bizParams.userDefinedTokens != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS, bizParams.userDefinedTokens); + } + if (bizParams.toolPrompts != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_TOOL_PROMPTS, bizParams.toolPrompts); + } + if (bizParams.userQueryParams != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_QUERY_PARAMS, bizParams.userQueryParams); + } + if (bizParams.userPromptParams != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_PROMPT_PARAMS, bizParams.userPromptParams); + } + if (bizParams.videos != null) { + bizParamsParams.put(CONST_NAME_BIZ_PARAMS_VIDEOS, bizParams.videos); + } if (bizParams.passThroughParams != null) { bizParamsParams.putAll(bizParams.passThroughParams); } @@ -222,6 +266,22 @@ public Map getParameters() { return params; } + private @NotNull HashMap getUpstreamAsrPostProcessing() { + val asrPostProcessingParams = new HashMap(); + if (upStream.asrPostProcessing.replaceWords != null) { + val replaceWords = new ArrayList>(); + for (val replaceWord : upStream.asrPostProcessing.replaceWords) { + val replaceWordObj= new HashMap(); + replaceWordObj.put(CONST_NAME_REPLACE_WORD_SOURCE, replaceWord.source); + replaceWordObj.put(CONST_NAME_REPLACE_WORD_TARGET, replaceWord.target); + replaceWordObj.put(CONST_NAME_REPLACE_WORD_MATCH_MODE, replaceWord.matchMode); + replaceWords.add(replaceWordObj); + } + asrPostProcessingParams.put(CONST_NAME_REPLACE_WORDS, replaceWords); + } + return asrPostProcessingParams; + } + @Override public Flowable getStreamingData() { return null; From 90e2f4530f2cc27c4c170b20a99d78bf1c94f63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Thu, 23 Oct 2025 16:13:48 +0800 Subject: [PATCH 08/64] feat(model/qwen3-livetranslate&asr-realtime):add input params --- samples/Qwen3AsrRealtimeUsage.java | 128 ++++++++++++++++++ samples/Qwen3LiveTranslateUsage.java | 125 +++++++++++++++++ .../audio/omni/OmniRealtimeConfig.java | 37 ++++- .../audio/omni/OmniRealtimeConstants.java | 8 +- .../audio/omni/OmniRealtimeConversation.java | 2 +- .../omni/OmniRealtimeTranscriptionParam.java | 45 ++++++ .../omni/OmniRealtimeTranslationParam.java | 13 ++ 7 files changed, 353 insertions(+), 5 deletions(-) create mode 100644 samples/Qwen3AsrRealtimeUsage.java create mode 100644 samples/Qwen3LiveTranslateUsage.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java diff --git a/samples/Qwen3AsrRealtimeUsage.java b/samples/Qwen3AsrRealtimeUsage.java new file mode 100644 index 0000000..a9077dd --- /dev/null +++ b/samples/Qwen3AsrRealtimeUsage.java @@ -0,0 +1,128 @@ +import com.alibaba.dashscope.audio.omni.*; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.google.gson.JsonObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.sound.sampled.LineUnavailableException; +import java.io.File; +import java.io.FileInputStream; +import java.util.Base64; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicReference; + + +public class Qwen3AsrRealtimeUsage { + private static final Logger log = LoggerFactory.getLogger(Qwen3AsrRealtimeUsage.class); + private static final int AUDIO_CHUNK_SIZE = 1024; // Audio chunk size in bytes + private static final int SLEEP_INTERVAL_MS = 30; // Sleep interval in milliseconds + + public static void main(String[] args) throws InterruptedException, LineUnavailableException { + + OmniRealtimeParam param = OmniRealtimeParam.builder() + .model("qwen3-asr-flash-realtime") + .apikey(System.getenv("DASHSCOPE_API_KEY")) + .build(); + + OmniRealtimeConversation conversation = null; + final AtomicReference conversationRef = new AtomicReference<>(null); + conversation = new OmniRealtimeConversation(param, new OmniRealtimeCallback() { + @Override + public void onOpen() { + System.out.println("connection opened"); + } + @Override + public void onEvent(JsonObject message) { + String type = message.get("type").getAsString(); + switch(type) { + case "session.created": + System.out.println("start session: " + message.get("session").getAsJsonObject().get("id").getAsString()); + break; + case "conversation.item.input_audio_transcription.completed": + System.out.println("question: " + message.get("transcript").getAsString()); + break; + case "response.audio_transcript.delta": + System.out.println("got llm response delta: " + message.get("delta").getAsString()); + break; + case "input_audio_buffer.speech_started": + System.out.println("======VAD Speech Start======"); + break; + case "input_audio_buffer.speech_stopped": + System.out.println("======VAD Speech Stop======"); + break; + case "response.done": + System.out.println("======RESPONSE DONE======"); + if (conversationRef.get() != null) { + System.out.println("[Metric] response: " + conversationRef.get().getResponseId() + + ", first text delay: " + conversationRef.get().getFirstTextDelay() + + " ms, first audio delay: " + conversationRef.get().getFirstAudioDelay() + " ms"); + } + break; + default: + break; + } + } + @Override + public void onClose(int code, String reason) { + System.out.println("connection closed code: " + code + ", reason: " + reason); + } + }); + conversationRef.set(conversation); + try { + conversation.connect(); + } catch (NoApiKeyException e) { + throw new RuntimeException(e); + } + + + OmniRealtimeTranscriptionParam transcriptionParam = new OmniRealtimeTranscriptionParam(); + transcriptionParam.setLanguage("zh"); + transcriptionParam.setInputAudioFormat("pcm"); + transcriptionParam.setInputSampleRate(16000); + transcriptionParam.setCorpusText("这是一段脱口秀表演"); + + OmniRealtimeConfig config = OmniRealtimeConfig.builder() + .modalities(Collections.singletonList(OmniRealtimeModality.TEXT)) + .transcriptionConfig(transcriptionParam) + .build(); + conversation.updateSession(config); + + + String filePath = "./path/to/your/audio/16k-16bit-mono-file.pcm"; + File audioFile = new File(filePath); + + if (!audioFile.exists()) { + log.error("Audio file not found: {}", filePath); + return; + } + + try (FileInputStream audioInputStream = new FileInputStream(audioFile)) { + byte[] audioBuffer = new byte[AUDIO_CHUNK_SIZE]; + int bytesRead; + int totalBytesRead = 0; + + log.info("Starting to send audio data from: {}", filePath); + + // Read and send audio data in chunks + while ((bytesRead = audioInputStream.read(audioBuffer)) != -1) { + totalBytesRead += bytesRead; + String audioB64 = Base64.getEncoder().encodeToString(audioBuffer); + // Send audio chunk to conversation + conversation.appendAudio(audioB64); + + // Add small delay to simulate real-time audio streaming + Thread.sleep(SLEEP_INTERVAL_MS); + } + + log.info("Finished sending audio data. Total bytes sent: {}", totalBytesRead); + + } catch (Exception e) { + log.error("Error sending audio from file: {}", filePath, e); + } + + conversation.commit(); + conversation.createResponse(null, null); + conversation.close(1000, "bye"); + System.exit(0); + } +} diff --git a/samples/Qwen3LiveTranslateUsage.java b/samples/Qwen3LiveTranslateUsage.java new file mode 100644 index 0000000..9b3f4a5 --- /dev/null +++ b/samples/Qwen3LiveTranslateUsage.java @@ -0,0 +1,125 @@ +import com.alibaba.dashscope.audio.omni.*; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.google.gson.JsonObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.sound.sampled.LineUnavailableException; +import java.io.File; +import java.io.FileInputStream; +import java.util.Arrays; +import java.util.Base64; +import java.util.concurrent.atomic.AtomicReference; + + +public class Qwen3LiveTranslateUsage { + private static final Logger log = LoggerFactory.getLogger(Qwen3LiveTranslateUsage.class); + private static final int AUDIO_CHUNK_SIZE = 1024; // Audio chunk size in bytes + private static final int SLEEP_INTERVAL_MS = 30; // Sleep interval in milliseconds + + public static void main(String[] args) throws InterruptedException, LineUnavailableException { + + OmniRealtimeParam param = OmniRealtimeParam.builder() + .model("qwen3-livetranslate-flash-realtime") + .apikey(System.getenv("DASHSCOPE_API_KEY")) + .build(); + + OmniRealtimeConversation conversation = null; + final AtomicReference conversationRef = new AtomicReference<>(null); + conversation = new OmniRealtimeConversation(param, new OmniRealtimeCallback() { + @Override + public void onOpen() { + System.out.println("connection opened"); + } + @Override + public void onEvent(JsonObject message) { + String type = message.get("type").getAsString(); + switch(type) { + case "session.created": + System.out.println("start session: " + message.get("session").getAsJsonObject().get("id").getAsString()); + break; + case "conversation.item.input_audio_transcription.completed": + System.out.println("question: " + message.get("transcript").getAsString()); + break; + case "response.audio_transcript.delta": + System.out.println("got llm response delta: " + message.get("delta").getAsString()); + break; + case "response.audio.delta": + String recvAudioB64 = message.get("delta").getAsString(); + // audioPlayer.write(recvAudioB64); // 音频播放,可以自行实现 + break; + case "input_audio_buffer.speech_started": + System.out.println("======VAD Speech Start======"); + break; + case "response.done": + System.out.println("======RESPONSE DONE======"); + if (conversationRef.get() != null) { + System.out.println("[Metric] response: " + conversationRef.get().getResponseId() + + ", first text delay: " + conversationRef.get().getFirstTextDelay() + + " ms, first audio delay: " + conversationRef.get().getFirstAudioDelay() + " ms"); + } + break; + default: + break; + } + } + @Override + public void onClose(int code, String reason) { + System.out.println("connection closed code: " + code + ", reason: " + reason); + } + }); + conversationRef.set(conversation); + try { + conversation.connect(); + } catch (NoApiKeyException e) { + throw new RuntimeException(e); + } + + OmniRealtimeConfig config = OmniRealtimeConfig.builder() + .modalities(Arrays.asList(OmniRealtimeModality.AUDIO, OmniRealtimeModality.TEXT)) + .voice("Cherry") + .outputAudioFormat(OmniRealtimeAudioFormat.PCM_24000HZ_MONO_16BIT) + .inputAudioFormat(OmniRealtimeAudioFormat.PCM_16000HZ_MONO_16BIT) + .translationConfig(OmniRealtimeTranslationParam.builder().language("en").build()) + .build(); + conversation.updateSession(config); + + + String filePath = "./path/to/your/audio/16k-16bit-mono-file.pcm"; + File audioFile = new File(filePath); + + if (!audioFile.exists()) { + log.error("Audio file not found: {}", filePath); + return; + } + + try (FileInputStream audioInputStream = new FileInputStream(audioFile)) { + byte[] audioBuffer = new byte[AUDIO_CHUNK_SIZE]; + int bytesRead; + int totalBytesRead = 0; + + log.info("Starting to send audio data from: {}", filePath); + + // Read and send audio data in chunks + while ((bytesRead = audioInputStream.read(audioBuffer)) != -1) { + totalBytesRead += bytesRead; + String audioB64 = Base64.getEncoder().encodeToString(audioBuffer); + // Send audio chunk to conversation + conversation.appendAudio(audioB64); + + // Add small delay to simulate real-time audio streaming + Thread.sleep(SLEEP_INTERVAL_MS); + } + + log.info("Finished sending audio data. Total bytes sent: {}", totalBytesRead); + + } catch (Exception e) { + log.error("Error sending audio from file: {}", filePath, e); + } + + conversation.commit(); + conversation.createResponse(null, null); + conversation.close(1000, "bye"); + System.exit(0); + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java index c798a62..0334048 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java @@ -19,8 +19,8 @@ public class OmniRealtimeConfig { /** omni output modalities to be used in session */ @NonNull List modalities; - /** voice to be used in session */ - @NonNull String voice; + /** voice to be used in session ,not need in qwen-asr-realtime*/ + @Builder.Default String voice = null; /** input audio format */ @Builder.Default @@ -50,6 +50,12 @@ public class OmniRealtimeConfig { @Builder.Default Map turnDetectionParam = null; /** The extra parameters. */ @Builder.Default Map parameters = null; + /** translation configuration */ + @Builder.Default + OmniRealtimeTranslationParam translationConfig = null; + /** transcription configuration */ + @Builder.Default + OmniRealtimeTranscriptionParam transcriptionConfig = null; public JsonObject getConfig() { Map config = new HashMap<>(); @@ -82,6 +88,31 @@ public JsonObject getConfig() { } else { config.put(OmniRealtimeConstants.TURN_DETECTION, null); } + // Add translation configuration to the config + if (translationConfig != null) { + Map translationConfig = new HashMap<>(); + translationConfig.put(OmniRealtimeConstants.LANGUAGE, this.translationConfig.getLanguage()); + config.put(OmniRealtimeConstants.TRANSLATION, translationConfig); + } else { + config.put(OmniRealtimeConstants.TRANSLATION, null); + } + // Add transcription configuration for qwen-asr-realtime + if (transcriptionConfig != null) { + Map transcriptionConfig = new HashMap<>(); + if (this.transcriptionConfig.getInputSampleRate() != null) { + config.put(OmniRealtimeConstants.SAMPLE_RATE, this.transcriptionConfig.getInputSampleRate()); + } + if (this.transcriptionConfig.getInputAudioFormat() != null) { + config.put(OmniRealtimeConstants.INPUT_AUDIO_FORMAT, this.transcriptionConfig.getInputAudioFormat()); + } + if (this.transcriptionConfig.getLanguage() != null) { + transcriptionConfig.put(OmniRealtimeConstants.LANGUAGE, this.transcriptionConfig.getLanguage()); + } + if (this.transcriptionConfig.getCorpus() != null) { + transcriptionConfig.put(OmniRealtimeConstants.INPUT_AUDIO_TRANSCRIPTION_CORPUS, this.transcriptionConfig.getCorpus()); + } + config.put(OmniRealtimeConstants.INPUT_AUDIO_TRANSCRIPTION, transcriptionConfig); + } if (parameters != null) { for (Map.Entry entry : parameters.entrySet()) { config.put(entry.getKey(), entry.getValue()); @@ -93,4 +124,4 @@ public JsonObject getConfig() { JsonObject jsonObject = gson.toJsonTree(config).getAsJsonObject(); return jsonObject; } -} +} \ No newline at end of file diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java index 704b8da..6b57da4 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java @@ -10,12 +10,18 @@ public class OmniRealtimeConstants { public static final String OUTPUT_AUDIO_FORMAT = "output_audio_format"; public static final String INPUT_AUDIO_TRANSCRIPTION = "input_audio_transcription"; public static final String INPUT_AUDIO_TRANSCRIPTION_MODEL = "model"; + public static final String INPUT_AUDIO_TRANSCRIPTION_CORPUS = "corpus"; public static final String TURN_DETECTION = "turn_detection"; public static final String TURN_DETECTION_TYPE = "type"; public static final String TURN_DETECTION_THRESHOLD = "threshold"; public static final String PREFIX_PADDING_MS = "prefix_padding_ms"; public static final String SILENCE_DURATION_MS = "silence_duration_ms"; + // Translation constants + public static final String TRANSLATION = "translation"; + public static final String LANGUAGE = "language"; + public static final String SAMPLE_RATE = "sample_rate"; + public static final String PROTOCOL_EVENT_ID = "event_id"; public static final String PROTOCOL_TYPE = "type"; public static final String PROTOCOL_SESSION = "session"; @@ -34,4 +40,4 @@ public class OmniRealtimeConstants { "response.audio_transcript.delta"; public static final String PROTOCOL_RESPONSE_TYPE_AUDIO_DELTA = "response.audio.delta"; public static final String PROTOCOL_RESPONSE_TYPE_RESPONSE_DONE = "response.done"; -} +} \ No newline at end of file diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java index f6663d0..0101c88 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java @@ -107,7 +107,7 @@ public void appendAudio(String audioBase64) { OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_APPEND_AUDIO); append_request.put(OmniRealtimeConstants.PROTOCOL_AUDIO, audioBase64); - log.debug("append audio with eid: " + event_id + ", length: " + audioBase64.length()); + log.info("append audio with eid: {}, length: {}", event_id, audioBase64.length()); GsonBuilder builder = new GsonBuilder(); builder.serializeNulls(); Gson gson = builder.create(); diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java new file mode 100644 index 0000000..ac9ab57 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java @@ -0,0 +1,45 @@ +package com.alibaba.dashscope.audio.omni; + +import lombok.Data; + +import java.util.HashMap; +import java.util.Map; + +/** @author songsong.shao */ +@Data +public class OmniRealtimeTranscriptionParam { + /** input audio sample rate*/ + private Integer inputSampleRate = null; + /** input audio format */ + private String inputAudioFormat = null; + /** input audio language */ + private String language = null; + + /** corpus for qwen-asr-realtime */ + private Map corpus = null; + + /** text content for corpus */ + private String corpusText; + + /** + * Set text in corpus to improve model recognition accuracy. + */ + public void setCorpusText(String text) { + if (corpus == null) { + corpus = new HashMap<>(); + } + this.corpusText = text; + corpus.put("text", text); + } + + /** + * Default constructor + */ + public OmniRealtimeTranscriptionParam() { + } + + public OmniRealtimeTranscriptionParam(String audioFormat, int sampleRate) { + this.inputAudioFormat = audioFormat; + this.inputSampleRate = sampleRate; + } +} \ No newline at end of file diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java new file mode 100644 index 0000000..eef4a3d --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java @@ -0,0 +1,13 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. +package com.alibaba.dashscope.audio.omni; + +import lombok.Builder; +import lombok.Data; + +/** @author songsong.shao */ +@Builder +@Data +public class OmniRealtimeTranslationParam { + /** language for translation */ + private String language; +} \ No newline at end of file From ec84d36404b0368977eb93882f2886542098e691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Thu, 23 Oct 2025 16:15:20 +0800 Subject: [PATCH 09/64] feat(model/cosyvoice-v3):add language hints to voice clone --- .../dashscope/audio/omni/OmniRealtimeConversation.java | 2 +- .../audio/ttsv2/enrollment/VoiceEnrollmentParam.java | 8 ++++++++ .../audio/ttsv2/enrollment/VoiceEnrollmentService.java | 3 ++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java index 0101c88..b41f419 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java @@ -107,7 +107,7 @@ public void appendAudio(String audioBase64) { OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_APPEND_AUDIO); append_request.put(OmniRealtimeConstants.PROTOCOL_AUDIO, audioBase64); - log.info("append audio with eid: {}, length: {}", event_id, audioBase64.length()); + log.debug("append audio with eid: {}, length: {}", event_id, audioBase64.length()); GsonBuilder builder = new GsonBuilder(); builder.serializeNulls(); Gson gson = builder.create(); diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java index d57177b..7d6b01c 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java @@ -3,9 +3,13 @@ import com.alibaba.dashscope.base.HalfDuplexServiceParam; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.utils.ApiKeywords; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonArray; import com.google.gson.JsonObject; import java.nio.ByteBuffer; import java.security.InvalidParameterException; +import java.util.List; + import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.SuperBuilder; @@ -19,6 +23,7 @@ public class VoiceEnrollmentParam extends HalfDuplexServiceParam { private VoiceEnrollmentOperationType operationType; private String url; private String voiceId; + private List languageHints = null; private int pageIndex; private int pageSize; @@ -44,6 +49,9 @@ public JsonObject getInput() { input.addProperty("target_model", targetModel); input.addProperty("prefix", prefix); input.addProperty("url", url); + if (languageHints != null) { + input.add("language_hints", JsonUtils.toJsonArray(languageHints)); + } break; case LIST: input.addProperty(ApiKeywords.ACTION, operationType.getValue()); diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java index 31f7fa5..18b41ff 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java @@ -142,11 +142,12 @@ public Voice createVoice( VoiceEnrollmentParam param = VoiceEnrollmentParam.builder() .operationType(VoiceEnrollmentOperationType.CREATE) - .model(this.model) + .model(customParam.getModel().isEmpty() ? this.model : customParam.getModel()) .targetModel(targetModel) .prefix(prefix) .url(url) .apiKey(apikey) + .languageHints(customParam.getLanguageHints()) .headers(customParam.getHeaders()) .resources(customParam.getResources()) .parameters(customParam.getParameters()) From 26ce40864cfa4f767ccdcd6a9d34af99103d8c35 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 23 Oct 2025 17:03:14 +0800 Subject: [PATCH 10/64] feat(http): support response codes --- samples/GenerationStreamCall.java | 30 +++++----- .../aigc/generation/GenerationResult.java | 8 +++ .../ImageSynthesisListResult.java | 10 ++++ .../imagesynthesis/ImageSynthesisResult.java | 10 ++++ .../MultiModalConversationResult.java | 10 ++++ .../VideoSynthesisListResult.java | 10 ++++ .../videosynthesis/VideoSynthesisResult.java | 10 ++++ .../dashscope/app/ApplicationResult.java | 15 +++++ .../dashscope/common/DashScopeResult.java | 56 +++++++++++++++++++ .../com/alibaba/dashscope/common/Result.java | 9 +++ .../embeddings/BatchTextEmbeddingResult.java | 10 ++++ .../embeddings/MultiModalEmbeddingResult.java | 10 ++++ .../embeddings/TextEmbeddingResult.java | 10 ++++ .../dashscope/protocol/NetworkResponse.java | 3 + .../protocol/okhttp/OkHttpHttpClient.java | 8 +++ .../dashscope/rerank/TextReRankResult.java | 10 ++++ .../dashscope/task/AsyncTaskListResult.java | 10 ++++ .../alibaba/dashscope/utils/ApiKeywords.java | 2 + 18 files changed, 217 insertions(+), 14 deletions(-) diff --git a/samples/GenerationStreamCall.java b/samples/GenerationStreamCall.java index 553e9a9..262a04c 100644 --- a/samples/GenerationStreamCall.java +++ b/samples/GenerationStreamCall.java @@ -18,13 +18,15 @@ public static void streamCall() Generation gen = new Generation(); GenerationParam param = GenerationParam.builder() .model("qwen3-max") - .prompt("就当前的海洋污染的情况,写一份限塑的倡议书提纲,需要有理有据地号召大家克制地使用塑料制品") + .prompt("你好") .topP(0.8) .incrementalOutput(false) .build(); Flowable result = gen.streamCall(param); result.blockingForEach(message -> { - System.out.println(JsonUtils.toJson(message)); + System.out.println("generation_result:"); + System.out.println(message); + System.out.println(JsonUtils.toJson(message) + "\n"); }); } @@ -32,7 +34,7 @@ public static void streamCallWithCallback() throws NoApiKeyException, ApiException, InputRequiredException,InterruptedException { Generation gen = new Generation(); GenerationParam param = GenerationParam.builder().model(Generation.Models.QWEN_PLUS) - .prompt("就当前的海洋污染的情况,写一份限塑的倡议书提纲,需要有理有据地号召大家克制地使用塑料制品").topP(0.8).build(); + .prompt("你好").topP(0.8).build(); Semaphore semaphore = new Semaphore(0); gen.streamCall(param, new ResultCallback() { @@ -86,17 +88,17 @@ public static void main(String[] args) { System.out.println(e.getMessage()); } - try { - streamCallWithCallback(); - } catch (ApiException | NoApiKeyException | InputRequiredException | InterruptedException e) { - System.out.println(e.getMessage()); - } - - try { - streamCallWithSearchOptions(); - } catch (ApiException | NoApiKeyException | InputRequiredException e) { - System.out.println(e.getMessage()); - } +// try { +// streamCallWithCallback(); +// } catch (ApiException | NoApiKeyException | InputRequiredException | InterruptedException e) { +// System.out.println(e.getMessage()); +// } +// +// try { +// streamCallWithSearchOptions(); +// } catch (ApiException | NoApiKeyException | InputRequiredException e) { +// System.out.println(e.getMessage()); +// } System.exit(0); } diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java index 1101c5d..3b82e4e 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java @@ -4,6 +4,7 @@ import com.alibaba.dashscope.common.DashScopeResult; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; import lombok.Data; import lombok.extern.slf4j.Slf4j; @@ -13,12 +14,19 @@ public final class GenerationResult { private String requestId; private GenerationUsage usage; private GenerationOutput output; + @SerializedName("status_code") + private Integer statusCode; + private String code; + private String message; private GenerationResult() {} public static GenerationResult fromDashScopeResult(DashScopeResult dashScopeResult) { GenerationResult result = new GenerationResult(); result.setRequestId(dashScopeResult.getRequestId()); + result.setStatusCode(dashScopeResult.getStatusCode()); + result.setCode(dashScopeResult.getCode()); + result.setMessage(dashScopeResult.getMessage()); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisListResult.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisListResult.java index 63d1907..db809f1 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisListResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisListResult.java @@ -27,12 +27,22 @@ public class ImageSynthesisListResult { @SerializedName("page_size") private Integer pageSize; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + public static ImageSynthesisListResult fromDashScopeResult(DashScopeResult dashScopeResult) { if (dashScopeResult.getOutput() != null) { ImageSynthesisListResult rs = (JsonUtils.fromJsonObject( (JsonObject) dashScopeResult.getOutput(), ImageSynthesisListResult.class)); rs.requestId = dashScopeResult.getRequestId(); + rs.statusCode = dashScopeResult.getStatusCode(); + rs.code = dashScopeResult.getCode(); + rs.message = dashScopeResult.getMessage(); return rs; } else { log.error(String.format("Result no output: %s", dashScopeResult)); diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisResult.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisResult.java index 2a919fc..4953f77 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisResult.java @@ -17,11 +17,21 @@ public class ImageSynthesisResult { private ImageSynthesisOutput output; private ImageSynthesisUsage usage; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + private ImageSynthesisResult() {} public static ImageSynthesisResult fromDashScopeResult(DashScopeResult dashScopeResult) { ImageSynthesisResult result = new ImageSynthesisResult(); result.requestId = dashScopeResult.getRequestId(); + result.statusCode = dashScopeResult.getStatusCode(); + result.code = dashScopeResult.getCode(); + result.message = dashScopeResult.getMessage(); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationResult.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationResult.java index 5ad7186..03369e5 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationResult.java @@ -3,6 +3,7 @@ import com.alibaba.dashscope.common.DashScopeResult; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; import lombok.Data; import lombok.extern.slf4j.Slf4j; @@ -13,11 +14,20 @@ public class MultiModalConversationResult { private MultiModalConversationUsage usage; private MultiModalConversationOutput output; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + private String message; + private MultiModalConversationResult() {} public static MultiModalConversationResult fromDashScopeResult(DashScopeResult dashScopeResult) { MultiModalConversationResult result = new MultiModalConversationResult(); result.setRequestId(dashScopeResult.getRequestId()); + result.setStatusCode(dashScopeResult.getStatusCode()); + result.setCode(dashScopeResult.getCode()); + result.setMessage(dashScopeResult.getMessage()); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisListResult.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisListResult.java index cadbe14..c5ecbc2 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisListResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisListResult.java @@ -27,12 +27,22 @@ public class VideoSynthesisListResult { @SerializedName("page_size") private Integer pageSize; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + public static VideoSynthesisListResult fromDashScopeResult(DashScopeResult dashScopeResult) { if (dashScopeResult.getOutput() != null) { VideoSynthesisListResult rs = (JsonUtils.fromJsonObject( (JsonObject) dashScopeResult.getOutput(), VideoSynthesisListResult.class)); rs.requestId = dashScopeResult.getRequestId(); + rs.statusCode = dashScopeResult.getStatusCode(); + rs.code = dashScopeResult.getCode(); + rs.message = dashScopeResult.getMessage(); return rs; } else { log.error("Result no output: {}", dashScopeResult); diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisResult.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisResult.java index 5c5d436..740efb1 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisResult.java @@ -17,11 +17,21 @@ public class VideoSynthesisResult { private VideoSynthesisOutput output; private VideoSynthesisUsage usage; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + private VideoSynthesisResult() {} public static VideoSynthesisResult fromDashScopeResult(DashScopeResult dashScopeResult) { VideoSynthesisResult result = new VideoSynthesisResult(); result.requestId = dashScopeResult.getRequestId(); + result.statusCode = dashScopeResult.getStatusCode(); + result.code = dashScopeResult.getCode(); + result.message = dashScopeResult.getMessage(); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/app/ApplicationResult.java b/src/main/java/com/alibaba/dashscope/app/ApplicationResult.java index fcffe4e..9cb8d98 100644 --- a/src/main/java/com/alibaba/dashscope/app/ApplicationResult.java +++ b/src/main/java/com/alibaba/dashscope/app/ApplicationResult.java @@ -32,9 +32,24 @@ public class ApplicationResult { @SerializedName("usage") private ApplicationUsage usage; + /** HTTP status code from server response */ + @SerializedName("status_code") + private Integer statusCode; + + /** Error code from server response */ + @SerializedName("code") + private String code; + + /** Message from server response */ + @SerializedName("message") + private String message; + public static ApplicationResult fromDashScopeResult(DashScopeResult dashScopeResult) { ApplicationResult result = new ApplicationResult(); result.setRequestId(dashScopeResult.getRequestId()); + result.setStatusCode(dashScopeResult.getStatusCode()); + result.setCode(dashScopeResult.getCode()); + result.setMessage(dashScopeResult.getMessage()); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java b/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java index 27089e9..931d50d 100644 --- a/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java +++ b/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java @@ -57,6 +57,10 @@ protected T fromResponse(Protocol protocol, NetworkResponse r } } else { JsonObject jsonObject = JsonUtils.parse(response.getMessage()); + // Set HTTP status code if available + if (response.getHttpStatusCode() != null) { + this.setStatusCode(response.getHttpStatusCode()); + } if (jsonObject.has(ApiKeywords.OUTPUT)) { this.output = jsonObject.get(ApiKeywords.OUTPUT).isJsonNull() @@ -72,6 +76,30 @@ protected T fromResponse(Protocol protocol, NetworkResponse r if (jsonObject.has(ApiKeywords.REQUEST_ID)) { this.setRequestId(jsonObject.get(ApiKeywords.REQUEST_ID).getAsString()); } + if (jsonObject.has(ApiKeywords.STATUS_CODE)) { + this.setStatusCode( + jsonObject.get(ApiKeywords.STATUS_CODE).isJsonNull() + ? null + : jsonObject.get(ApiKeywords.STATUS_CODE).getAsInt()); + } + if (jsonObject.has(ApiKeywords.CODE)) { + this.setCode( + jsonObject.get(ApiKeywords.CODE).isJsonNull() + ? "" + : jsonObject.get(ApiKeywords.CODE).getAsString()); + } else { + // Set default empty string for successful responses + this.setCode(""); + } + if (jsonObject.has(ApiKeywords.MESSAGE)) { + this.setMessage( + jsonObject.get(ApiKeywords.MESSAGE).isJsonNull() + ? "" + : jsonObject.get(ApiKeywords.MESSAGE).getAsString()); + } else { + // Set default empty string for successful responses + this.setMessage(""); + } if (jsonObject.has(ApiKeywords.DATA)) { if (jsonObject.has(ApiKeywords.REQUEST_ID)) { jsonObject.remove(ApiKeywords.REQUEST_ID); @@ -116,6 +144,10 @@ public T fromResponse( if ((response.getHeaders().containsKey("X-DashScope-OutputEncrypted".toLowerCase()) || req.isEncryptRequest()) && protocol == Protocol.HTTP) { + // Set HTTP status code if available + if (response.getHttpStatusCode() != null) { + this.setStatusCode(response.getHttpStatusCode()); + } JsonObject jsonObject = JsonUtils.parse(response.getMessage()); String encryptedOutput = jsonObject.get(ApiKeywords.OUTPUT).isJsonNull() @@ -140,6 +172,30 @@ public T fromResponse( if (jsonObject.has(ApiKeywords.REQUEST_ID)) { this.setRequestId(jsonObject.get(ApiKeywords.REQUEST_ID).getAsString()); } + if (jsonObject.has(ApiKeywords.STATUS_CODE)) { + this.setStatusCode( + jsonObject.get(ApiKeywords.STATUS_CODE).isJsonNull() + ? null + : jsonObject.get(ApiKeywords.STATUS_CODE).getAsInt()); + } + if (jsonObject.has(ApiKeywords.CODE)) { + this.setCode( + jsonObject.get(ApiKeywords.CODE).isJsonNull() + ? "" + : jsonObject.get(ApiKeywords.CODE).getAsString()); + } else { + // Set default empty string for successful responses + this.setCode(""); + } + if (jsonObject.has(ApiKeywords.MESSAGE)) { + this.setMessage( + jsonObject.get(ApiKeywords.MESSAGE).isJsonNull() + ? "" + : jsonObject.get(ApiKeywords.MESSAGE).getAsString()); + } else { + // Set default empty string for successful responses + this.setMessage(""); + } if (jsonObject.has(ApiKeywords.DATA)) { if (jsonObject.has(ApiKeywords.REQUEST_ID)) { jsonObject.remove(ApiKeywords.REQUEST_ID); diff --git a/src/main/java/com/alibaba/dashscope/common/Result.java b/src/main/java/com/alibaba/dashscope/common/Result.java index fdf5e0c..0eaf26a 100644 --- a/src/main/java/com/alibaba/dashscope/common/Result.java +++ b/src/main/java/com/alibaba/dashscope/common/Result.java @@ -26,6 +26,15 @@ public abstract class Result { /** The headers of response */ private Map headers; + /** The HTTP status code from server response */ + private Integer statusCode; + + /** The error code from server response */ + private String code; + + /** The message from server response */ + private String message; + /** * Load data from the server output. * diff --git a/src/main/java/com/alibaba/dashscope/embeddings/BatchTextEmbeddingResult.java b/src/main/java/com/alibaba/dashscope/embeddings/BatchTextEmbeddingResult.java index ec780bb..b35a4df 100644 --- a/src/main/java/com/alibaba/dashscope/embeddings/BatchTextEmbeddingResult.java +++ b/src/main/java/com/alibaba/dashscope/embeddings/BatchTextEmbeddingResult.java @@ -16,6 +16,13 @@ public class BatchTextEmbeddingResult { private BatchTextEmbeddingOutput output; private BatchTextEmbeddingUsage usage; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + private BatchTextEmbeddingResult() {} public static BatchTextEmbeddingResult fromDashScopeResult(DashScopeResult dashScopeResult) { @@ -25,6 +32,9 @@ public static BatchTextEmbeddingResult fromDashScopeResult(DashScopeResult dashS (JsonObject) dashScopeResult.getOutput(), BatchTextEmbeddingOutput.class); res.usage = JsonUtils.fromJson(dashScopeResult.getUsage(), BatchTextEmbeddingUsage.class); res.requestId = dashScopeResult.getRequestId(); + res.statusCode = dashScopeResult.getStatusCode(); + res.code = dashScopeResult.getCode(); + res.message = dashScopeResult.getMessage(); return res; } } diff --git a/src/main/java/com/alibaba/dashscope/embeddings/MultiModalEmbeddingResult.java b/src/main/java/com/alibaba/dashscope/embeddings/MultiModalEmbeddingResult.java index 53a6598..53a0b4d 100644 --- a/src/main/java/com/alibaba/dashscope/embeddings/MultiModalEmbeddingResult.java +++ b/src/main/java/com/alibaba/dashscope/embeddings/MultiModalEmbeddingResult.java @@ -5,6 +5,7 @@ import com.alibaba.dashscope.common.DashScopeResult; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; import lombok.Data; @Data @@ -13,6 +14,12 @@ public class MultiModalEmbeddingResult { private MultiModalEmbeddingOutput output; private MultiModalEmbeddingUsage usage; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + private String message; + private MultiModalEmbeddingResult() {} public static MultiModalEmbeddingResult fromDashScopeResult(DashScopeResult dashScopeResult) { @@ -23,6 +30,9 @@ public static MultiModalEmbeddingResult fromDashScopeResult(DashScopeResult dash res.usage = JsonUtils.fromJson((JsonObject) dashScopeResult.getUsage(), MultiModalEmbeddingUsage.class); res.requestId = dashScopeResult.getRequestId(); + res.statusCode = dashScopeResult.getStatusCode(); + res.code = dashScopeResult.getCode(); + res.message = dashScopeResult.getMessage(); return res; } } diff --git a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingResult.java b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingResult.java index 9b5d62d..44a0271 100644 --- a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingResult.java +++ b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingResult.java @@ -5,6 +5,7 @@ import com.alibaba.dashscope.common.DashScopeResult; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; import lombok.Data; @Data @@ -13,6 +14,12 @@ public class TextEmbeddingResult { private TextEmbeddingOutput output; private TextEmbeddingUsage usage; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + private String message; + private TextEmbeddingResult() {} public static TextEmbeddingResult fromDashScopeResult(DashScopeResult dashScopeResult) { @@ -21,6 +28,9 @@ public static TextEmbeddingResult fromDashScopeResult(DashScopeResult dashScopeR JsonUtils.fromJson((JsonObject) dashScopeResult.getOutput(), TextEmbeddingOutput.class); res.usage = JsonUtils.fromJson(dashScopeResult.getUsage(), TextEmbeddingUsage.class); res.requestId = dashScopeResult.getRequestId(); + res.statusCode = dashScopeResult.getStatusCode(); + res.code = dashScopeResult.getCode(); + res.message = dashScopeResult.getMessage(); // Type listType = new TypeToken>(){}.getType(); // res.embeddings = JsonUtils.fromJson(res.output.get("embeddings"), listType); return res; diff --git a/src/main/java/com/alibaba/dashscope/protocol/NetworkResponse.java b/src/main/java/com/alibaba/dashscope/protocol/NetworkResponse.java index 4a7bb5f..0577217 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/NetworkResponse.java +++ b/src/main/java/com/alibaba/dashscope/protocol/NetworkResponse.java @@ -24,4 +24,7 @@ public class NetworkResponse { /** The binary type response. */ private ByteBuffer binary; + + /** The HTTP status code */ + private Integer httpStatusCode; } diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpHttpClient.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpHttpClient.java index c7dcac6..1da66f0 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpHttpClient.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpHttpClient.java @@ -244,6 +244,7 @@ public DashScopeResult send(HalfDuplexRequest req) throws NoApiKeyException, Api NetworkResponse.builder() .headers(response.headers().toMultimap()) .message(response.body().string()) + .httpStatusCode(response.code()) .build(), req.getIsFlatten(), req); @@ -279,6 +280,7 @@ public void onResponse(Call call, Response response) throws IOException { NetworkResponse.builder() .headers(response.headers().toMultimap()) .message(response.body().string()) + .httpStatusCode(response.code()) .build(), req.getIsFlatten(), req)); @@ -310,6 +312,7 @@ private void handleSSEEvent( .headers(response.headers().toMultimap()) .message(data) .event(eventType) + .httpStatusCode(response.code()) .build(), isFlattenResult, req)); @@ -325,6 +328,7 @@ private void handleSSEEvent( .headers(response.headers().toMultimap()) .message(data) .event(eventType) + .httpStatusCode(response.code()) .build(), isFlattenResult, req)); @@ -340,6 +344,7 @@ private void handleSSEEvent( NetworkResponse.builder() .headers(response.headers().toMultimap()) .message(data) + .httpStatusCode(response.code()) .build(), isFlattenResult, req)); @@ -433,6 +438,7 @@ public void onEvent( .headers(response.headers().toMultimap()) .message(data) .event(type) + .httpStatusCode(response.code()) .build(), req.getIsFlatten(), req)); @@ -445,6 +451,7 @@ public void onEvent( .headers(response.headers().toMultimap()) .message(data) .event(type) + .httpStatusCode(response.code()) .build(), req.getIsFlatten(), req)); @@ -456,6 +463,7 @@ public void onEvent( NetworkResponse.builder() .headers(response.headers().toMultimap()) .message(data) + .httpStatusCode(response.code()) .build(), req.getIsFlatten(), req)); diff --git a/src/main/java/com/alibaba/dashscope/rerank/TextReRankResult.java b/src/main/java/com/alibaba/dashscope/rerank/TextReRankResult.java index 6688562..734f801 100644 --- a/src/main/java/com/alibaba/dashscope/rerank/TextReRankResult.java +++ b/src/main/java/com/alibaba/dashscope/rerank/TextReRankResult.java @@ -18,11 +18,21 @@ public class TextReRankResult { private TextReRankOutput output; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + private TextReRankResult() {} public static TextReRankResult fromDashScopeResult(DashScopeResult dashScopeResult) { TextReRankResult result = new TextReRankResult(); result.setRequestId(dashScopeResult.getRequestId()); + result.setStatusCode(dashScopeResult.getStatusCode()); + result.setCode(dashScopeResult.getCode()); + result.setMessage(dashScopeResult.getMessage()); if (dashScopeResult.getUsage() != null) { result.setUsage( JsonUtils.fromJsonObject( diff --git a/src/main/java/com/alibaba/dashscope/task/AsyncTaskListResult.java b/src/main/java/com/alibaba/dashscope/task/AsyncTaskListResult.java index e0f8f20..cc829cc 100644 --- a/src/main/java/com/alibaba/dashscope/task/AsyncTaskListResult.java +++ b/src/main/java/com/alibaba/dashscope/task/AsyncTaskListResult.java @@ -27,12 +27,22 @@ public class AsyncTaskListResult { @SerializedName("page_size") private Integer pageSize; + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + public static AsyncTaskListResult fromDashScopeResult(DashScopeResult dashScopeResult) { if (dashScopeResult.getOutput() != null) { AsyncTaskListResult rs = (JsonUtils.fromJsonObject( (JsonObject) dashScopeResult.getOutput(), AsyncTaskListResult.class)); rs.requestId = dashScopeResult.getRequestId(); + rs.statusCode = dashScopeResult.getStatusCode(); + rs.code = dashScopeResult.getCode(); + rs.message = dashScopeResult.getMessage(); return rs; } else { log.error(String.format("Result no output: %s", dashScopeResult)); diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 2891699..a0b81bb 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -9,6 +9,8 @@ public class ApiKeywords { public static final String CODE = "code"; + public static final String STATUS_CODE = "status_code"; + public static final String DATA = "data"; public static final String MESSAGE = "message"; From 1b98403fb791f9c98667c0564ce319006a8cef6f Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Mon, 27 Oct 2025 11:00:35 +0800 Subject: [PATCH 11/64] feat(ws): support response code --- .../dashscope/common/DashScopeResult.java | 28 +++++++++++++++++++ .../alibaba/dashscope/utils/ApiKeywords.java | 2 ++ 2 files changed, 30 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java b/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java index 931d50d..3f17fcd 100644 --- a/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java +++ b/src/main/java/com/alibaba/dashscope/common/DashScopeResult.java @@ -36,6 +36,34 @@ protected T fromResponse(Protocol protocol, NetworkResponse r if (headers.has(ApiKeywords.TASKID)) { this.setRequestId(headers.get(ApiKeywords.TASKID).getAsString()); } + // Extract status_code, code and message from header + if (headers.has(ApiKeywords.STATUS_CODE)) { + this.setStatusCode( + headers.get(ApiKeywords.STATUS_CODE).isJsonNull() + ? null + : headers.get(ApiKeywords.STATUS_CODE).getAsInt()); + } else { + // Set default status code + this.setStatusCode(200); + } + if (headers.has(ApiKeywords.ERROR_CODE)) { + this.setCode( + headers.get(ApiKeywords.ERROR_CODE).isJsonNull() + ? "" + : headers.get(ApiKeywords.ERROR_CODE).getAsString()); + } else { + // Set default empty string for successful responses + this.setCode(""); + } + if (headers.has(ApiKeywords.ERROR_MESSAGE)) { + this.setMessage( + headers.get(ApiKeywords.ERROR_MESSAGE).isJsonNull() + ? "" + : headers.get(ApiKeywords.ERROR_MESSAGE).getAsString()); + } else { + // Set default empty string for successful responses + this.setMessage(""); + } } if (jsonObject.has(ApiKeywords.PAYLOAD)) { JsonObject payload = jsonObject.getAsJsonObject(ApiKeywords.PAYLOAD); diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index a0b81bb..61ca7f1 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -99,6 +99,8 @@ public class ApiKeywords { public static final String ERROR_NAME = "error_name"; + public static final String ERROR_CODE = "error_code"; + public static final String ERROR_MESSAGE = "error_message"; public static final String OUTPUT = "output"; From 789c2a7cabccac6a4ae798fe2983420677e36185 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Mon, 27 Oct 2025 16:52:48 +0800 Subject: [PATCH 12/64] release version 2.21.14 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0e14f16..c91f3cb 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.21.13 + 2.21.14 8 From 5527b2c09f2f08183e856c2e7d4fe1aa6e84b951 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 30 Oct 2025 14:01:02 +0800 Subject: [PATCH 13/64] feat(app): support cip_service_codes --- samples/ApplicationCalls.java | 40 ++++++++++++---- .../alibaba/dashscope/app/AppKeywords.java | 2 + .../dashscope/app/ApplicationParam.java | 7 +++ .../dashscope/app/CipServiceCodes.java | 46 +++++++++++++++++++ 4 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java diff --git a/samples/ApplicationCalls.java b/samples/ApplicationCalls.java index ed4a95c..e5208f9 100644 --- a/samples/ApplicationCalls.java +++ b/samples/ApplicationCalls.java @@ -345,24 +345,45 @@ public static void callWithFileList() } /** - * Stream call with file list sample + * Stream call with CIP (Content Integrity Protection) parameters + * sample * * @throws NoApiKeyException Can not find api key * @throws InputRequiredException Missing inputs. */ - public static void streamCallWithFileList() + public static void streamCallWithCIP() throws NoApiKeyException, InputRequiredException { + // Build CIP service codes for content security check + CipServiceCodes.Text textCheck = + CipServiceCodes.Text.builder() + .input("query_security_check") + .output("response_security_check") + .build(); + + CipServiceCodes.Image imageCheck = + CipServiceCodes.Image.builder() + .input("img_query_security_check") + .build(); + + CipServiceCodes cipServiceCodes = CipServiceCodes.builder() + .text(textCheck) + .image(imageCheck) + .build(); + ApplicationParam param = ApplicationParam.builder() .appId(APP_ID) - .prompt("总结文件内容") - .files(Collections.singletonList( - "https://dashscope.oss-cn-beijing.aliyuncs.com/audios/welcome.mp3")) - .incrementalOutput(true) + .prompt("图片里是什么内容") + .images(Collections.singletonList("https://yutai007.oss-cn-beijing.aliyuncs.com/documentsForTest/image/%E7%BE%BD%E7%BB%92%E6%9C%8D-wgggc.jpg")) + .cipServiceCodes(cipServiceCodes) +// .incrementalOutput(true) .build(); Application application = new Application(); - Flowable result = application.streamCall(param); - result.blockingForEach(data -> System.out.println(JsonUtils.toJson(data))); +// Flowable result = application.streamCall(param); +// result.blockingForEach(data -> +// System.out.println(JsonUtils.toJson(data))); + ApplicationResult result = application.call(param); + System.out.println(JsonUtils.toJson(result)); } @@ -379,8 +400,9 @@ public static void main(String[] args) { // ragCallWithDocReference(); // callWithMoreParameters(); // callWithThinking(); - callWithFileList(); +// callWithFileList(); // streamCallWithFileList(); + streamCallWithCIP(); } catch (ApiException | NoApiKeyException | InputRequiredException e) { System.out.printf("Exception: %s", e.getMessage()); } diff --git a/src/main/java/com/alibaba/dashscope/app/AppKeywords.java b/src/main/java/com/alibaba/dashscope/app/AppKeywords.java index 96792f4..2be2071 100644 --- a/src/main/java/com/alibaba/dashscope/app/AppKeywords.java +++ b/src/main/java/com/alibaba/dashscope/app/AppKeywords.java @@ -53,4 +53,6 @@ public interface AppKeywords { String FLOW_STREAM_MODE = "flow_stream_mode"; String ENABLE_THINKING = "enable_thinking"; + + String CIP_SERVICE_CODES = "cip_service_codes"; } diff --git a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java index 73a277f..f579796 100644 --- a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java +++ b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java @@ -147,6 +147,10 @@ public class ApplicationParam extends HalfDuplexParamBase { */ private Boolean enableThinking; + /** + * CIP service codes for content security check + */ + private CipServiceCodes cipServiceCodes; @Override public String getModel() { @@ -201,6 +205,9 @@ public Map getParameters() { if (enableThinking != null) { params.put(AppKeywords.ENABLE_THINKING, enableThinking); } + if (cipServiceCodes != null) { + params.put(AppKeywords.CIP_SERVICE_CODES, cipServiceCodes); + } params.putAll(parameters); diff --git a/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java b/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java new file mode 100644 index 0000000..e2cebff --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java @@ -0,0 +1,46 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. +package com.alibaba.dashscope.app; + +import lombok.Builder; +import lombok.Data; + +/** + * CIP service codes configuration for content security check. + * + * @since jdk8 + */ +@Data +@Builder +public class CipServiceCodes { + /** Text security check configuration */ + private Text text; + + /** Image security check configuration */ + private Image image; + + /** + * Text security check configuration. + */ + @Data + @Builder + public static class Text { + /** Input security check service code */ + private String input; + + /** Output security check service code */ + private String output; + } + + /** + * Image security check configuration. + */ + @Data + @Builder + public static class Image { + /** Input security check service code */ + private String input; + + /** Output security check service code */ + private String output; + } +} From 5b6b3cbc0ea8891cf40b50f3b063c88afe77faba Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 31 Oct 2025 17:33:05 +0800 Subject: [PATCH 14/64] release version 2.21.15 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c91f3cb..b9139af 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.21.14 + 2.21.15 8 From ba48ccd977efd2d85b42fb4c0c053db7a69828fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 28 Oct 2025 14:43:41 +0800 Subject: [PATCH 15/64] feat(model/qwen3-tts-realtime): add sample_rate/volume/speech_rate/format/bit_rate param --- .../QwenTtsRealtimeConfig.java | 43 ++++++++++++++++++- .../QwenTtsRealtimeConstants.java | 4 ++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index 60d589b..d6463f0 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -25,6 +25,25 @@ public class QwenTtsRealtimeConfig { @Builder.Default String mode = "server_commit"; /** languageType for tts */ @Builder.Default String languageType = null; + + /** sampleRate for tts , range [8000,16000,22050,24000,44100,48000]. default is 24000 */ + @Builder.Default Integer sampleRate = null; + + /** speechRate for tts , range [0.5~2.0],default is 1.0 */ + @Builder.Default Float speechRate = null; + + /** volume for tts , range [0~100],default is 50 */ + @Builder.Default Integer volume = null; + + /** format for tts, support mp3,wav,pcm,opus,default is pcm */ + @Builder.Default String format = null; + + /** pitchRate for tts , range [0.5~2.0],default is 1.0 */ + @Builder.Default Float pitchRate = null; + + /** bitRate for tts , support 6~510,default is 128kbps. only work on format: opus/mp3 */ + @Builder.Default Integer bitRate = null; + /** The extra parameters. */ @Builder.Default Map parameters = null; @@ -32,8 +51,28 @@ public JsonObject getConfig() { Map config = new HashMap<>(); config.put(QwenTtsRealtimeConstants.VOICE, voice); config.put(QwenTtsRealtimeConstants.MODE, mode); - config.put(QwenTtsRealtimeConstants.RESPONSE_FORMAT, responseFormat.getFormat()); - config.put(QwenTtsRealtimeConstants.SAMPLE_RATE, responseFormat.getSampleRate()); + if (this.format != null) { + config.put(QwenTtsRealtimeConstants.RESPONSE_FORMAT, this.format); + } else { + config.put(QwenTtsRealtimeConstants.RESPONSE_FORMAT, responseFormat.getFormat()); + } + if (this.sampleRate != null) { + config.put(QwenTtsRealtimeConstants.SAMPLE_RATE, this.sampleRate); + } else { + config.put(QwenTtsRealtimeConstants.SAMPLE_RATE, responseFormat.getSampleRate()); + } + if (this.speechRate != null) { + config.put(QwenTtsRealtimeConstants.SPEECH_RATE, this.speechRate); + } + if (this.pitchRate != null) { + config.put(QwenTtsRealtimeConstants.PITCH_RATE, this.pitchRate); + } + if (this.volume != null) { + config.put(QwenTtsRealtimeConstants.VOLUME, this.volume); + } + if (this.bitRate != null) { + config.put(QwenTtsRealtimeConstants.BIT_RATE, this.bitRate); + } if (languageType != null) { config.put(QwenTtsRealtimeConstants.LANGUAGE_TYPE,languageType); } diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java index 2520b73..589a219 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java @@ -9,6 +9,10 @@ public class QwenTtsRealtimeConstants { public static final String MODE = "mode"; public static final String RESPONSE_FORMAT = "response_format"; public static final String SAMPLE_RATE = "sample_rate"; + public static final String SPEECH_RATE = "speech_rate"; + public static final String PITCH_RATE = "pitch_rate"; + public static final String VOLUME = "volume"; + public static final String BIT_RATE = "bit_rate"; public static final String LANGUAGE_TYPE = "language_type"; public static final String PROTOCOL_EVENT_ID = "event_id"; public static final String PROTOCOL_TYPE = "type"; From 48f2833e8e7e3d8185b7f8c10215e7e48b2d684f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=8C=AA=E9=BA=A6?= Date: Wed, 29 Oct 2025 11:52:42 +0800 Subject: [PATCH 16/64] [Add] sentence begine and pre end in recognition results --- .../audio/asr/recognition/Recognition.java | 7 +++++- .../asr/recognition/RecognitionResult.java | 7 ++++++ .../asr/recognition/timestamp/Sentence.java | 25 ++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java index bfb3801..31d2968 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java @@ -128,7 +128,12 @@ public Flowable streamCall( item -> { return RecognitionResult.fromDashScopeResult(item); }) - .filter(item -> item != null && item.getSentence() != null && !item.isCompleteResult() && !item.getSentence().isHeartbeat()) + .filter( + item -> + item != null + && item.getSentence() != null + && !item.isCompleteResult() + && !item.getSentence().isHeartbeat()) .doOnNext( result -> { if (lastRequestId.get() == null && result.getRequestId() != null) { diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/RecognitionResult.java b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/RecognitionResult.java index 384b32f..3e72a92 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/RecognitionResult.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/RecognitionResult.java @@ -24,7 +24,14 @@ public class RecognitionResult { private boolean isCompleteResult = false; + public boolean isSentenceBegin() { + return sentence.isSentenceBegin(); + } + public boolean isSentenceEnd() { + if (sentence.isSentenceEnd()) { + return true; + } return sentence.getEndTime() != null; } diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/timestamp/Sentence.java b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/timestamp/Sentence.java index dc4a086..3a721a0 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/timestamp/Sentence.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/timestamp/Sentence.java @@ -6,11 +6,7 @@ import com.google.gson.JsonObject; import com.google.gson.annotations.SerializedName; import java.util.List; - -import lombok.Builder; import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.experimental.SuperBuilder; @Data public class Sentence { @@ -39,6 +35,27 @@ public class Sentence { @SerializedName("sentence_id") private Long sentenceId; + @SerializedName("vad_pre_end") + boolean vadPreEnd; + + @SerializedName("pre_end_failed") + boolean preEndFailed; + + @SerializedName("pre_end_timemillis") + Long preEndTimemillis; + + @SerializedName("pre_end_start_time") + Long preEndStartTime; + + @SerializedName("pre_end_end_time") + Long preEndEndTime; + + @SerializedName("sentence_begin") + boolean sentenceBegin; + + @SerializedName("sentence_end") + boolean sentenceEnd; + public static Sentence from(String message) { return JsonUtils.fromJson(message, Sentence.class); } From f0a2139d5803d739bfe90d2d1e54e5fa14fd6c18 Mon Sep 17 00:00:00 2001 From: longSky <66709229+loli0123456789@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:41:05 +0800 Subject: [PATCH 17/64] Standardize Authorization header format: use 'Bearer' instead of 'bearer' --- .../java/com/alibaba/dashscope/protocol/DashScopeHeaders.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java index dfe9de9..1daca1c 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java +++ b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java @@ -24,7 +24,7 @@ public static Map buildWebSocketHeaders( String apiKey, boolean isSecurityCheck, String workspace, Map customHeaders) throws NoApiKeyException { Map headers = new HashMap<>(); - headers.put("Authorization", "bearer " + ApiKey.getApiKey(apiKey)); + headers.put("Authorization", "Bearer " + ApiKey.getApiKey(apiKey)); headers.put("user-agent", userAgent()); if (workspace != null && !workspace.isEmpty()) { headers.put("X-DashScope-WorkSpace", workspace); From c172449063551d8bd6059b241b0b887641e71dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Wed, 5 Nov 2025 11:18:28 +0800 Subject: [PATCH 18/64] add: add new websocket client for audio models with thread pool fix --- .../multimodal/MultiModalDialog.java | 34 +++- .../dashscope/protocol/ClientProviders.java | 11 +- .../dashscope/protocol/ConnectionOptions.java | 10 ++ .../okhttp/OkHttpWebSocketClient.java | 10 +- .../okhttp/OkHttpWebSocketClientForAudio.java | 165 ++++++++++++++++++ 5 files changed, 222 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java index 34aa6bd..55ce234 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java @@ -7,6 +7,7 @@ import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.protocol.ApiServiceOption; +import com.alibaba.dashscope.protocol.ConnectionOptions; import com.alibaba.dashscope.protocol.Protocol; import com.alibaba.dashscope.protocol.StreamingMode; import com.alibaba.dashscope.utils.Constants; @@ -44,6 +45,8 @@ public class MultiModalDialog { private ApiServiceOption serviceOption; // Service option configuration + private ConnectionOptions connectionOptions; + private Emitter conversationEmitter; // Message emitter private MultiModalRequestParam requestParam; // Request parameter @@ -137,7 +140,36 @@ public MultiModalDialog( this.requestParam = param; this.callback = callback; - this.duplexApi = new SynchronizeFullDuplexApi<>(serviceOption); + connectionOptions = ConnectionOptions.builder().build(); + this.connectionOptions.setUseDefaultClient(false); + this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions,serviceOption); + } + + + /** + * Constructor initializes service options and creates a duplex communication API instance. + * + * param: param Request parameter + * param: callback Callback interface + * param: connectionOptions Connection options + */ + public MultiModalDialog( + MultiModalRequestParam param, MultiModalDialogCallback callback, ConnectionOptions connectionOptions) { + this.serviceOption = + ApiServiceOption.builder() + .protocol(Protocol.WEBSOCKET) + .streamingMode(StreamingMode.DUPLEX) + .outputMode(OutputMode.ACCUMULATE) + .taskGroup(TaskGroup.AIGC.getValue()) + .task(Task.MULTIMODAL_GENERATION.getValue()) + .function(Function.GENERATION.getValue()) + .build(); + this.connectionOptions = connectionOptions; + this.connectionOptions.setUseDefaultClient(false); + + this.requestParam = param; + this.callback = callback; + this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions,serviceOption); } /** diff --git a/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java b/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java index f5f14f4..32d5336 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java +++ b/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java @@ -5,6 +5,7 @@ import com.alibaba.dashscope.protocol.okhttp.OkHttpClientFactory; import com.alibaba.dashscope.protocol.okhttp.OkHttpHttpClient; import com.alibaba.dashscope.protocol.okhttp.OkHttpWebSocketClient; +import com.alibaba.dashscope.protocol.okhttp.OkHttpWebSocketClientForAudio; public class ClientProviders { public static HalfDuplexClient getHalfDuplexClient(String protocol) { @@ -54,8 +55,14 @@ public static FullDuplexClient getFullDuplexClient( // create default config client, create default http client. return new OkHttpWebSocketClient(OkHttpClientFactory.getOkHttpClient(), passTaskStarted); } else { - return new OkHttpWebSocketClient( - OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); + if (connectionOptions.isUseDefaultClient()) { + return new OkHttpWebSocketClient( + OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); + }else { + // create custom client for audio models + return new OkHttpWebSocketClientForAudio( + OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); + } } } } diff --git a/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java b/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java index 1591220..06476ea 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java +++ b/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java @@ -31,6 +31,7 @@ public final class ConnectionOptions { private Duration connectTimeout; private Duration writeTimeout; private Duration readTimeout; + private boolean useDefaultClient = true; public Duration getConnectTimeout() { return getDuration(connectTimeout, DEFAULT_CONNECT_TIMEOUT, CONNECTION_TIMEOUT_ENV); @@ -84,4 +85,13 @@ public Proxy getProxy() { } return null; } + + public boolean isUseDefaultClient() { + return useDefaultClient; + } + + public void setUseDefaultClient(boolean useDefaultClient) { + this.useDefaultClient = useDefaultClient; + } + } diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java index b86bdb0..44235d8 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java @@ -41,9 +41,9 @@ public class OkHttpWebSocketClient extends WebSocketListener private AtomicBoolean isOpen = new AtomicBoolean(false); private AtomicBoolean isClosed = new AtomicBoolean(false); // indicate the first response is received. - private AtomicBoolean isFirstMessage = new AtomicBoolean(false); + protected AtomicBoolean isFirstMessage = new AtomicBoolean(false); // used for get request response - private FlowableEmitter responseEmitter; + protected FlowableEmitter responseEmitter; // is the result is flatten format. private boolean isFlattenResult; private FlowableEmitter connectionEmitter; @@ -363,7 +363,7 @@ public void onOpen(WebSocket webSocket, Response response) { } } - private void sendTextWithRetry( + protected void sendTextWithRetry( String apiKey, boolean isSecurityCheck, String message, @@ -402,7 +402,7 @@ private void sendTextWithRetry( } } - private void sendBinaryWithRetry( + protected void sendBinaryWithRetry( String apiKey, boolean isSecurityCheck, ByteString message, @@ -555,7 +555,7 @@ public void run() throws Exception { }); } - private CompletableFuture sendStreamRequest(FullDuplexRequest req) { + protected CompletableFuture sendStreamRequest(FullDuplexRequest req) { CompletableFuture future = CompletableFuture.runAsync( () -> { diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java new file mode 100644 index 0000000..05e9558 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java @@ -0,0 +1,165 @@ +package com.alibaba.dashscope.protocol.okhttp; + +import com.alibaba.dashscope.protocol.FullDuplexRequest; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import io.reactivex.Flowable; +import io.reactivex.functions.Action; +import lombok.extern.slf4j.Slf4j; +import okhttp3.OkHttpClient; +import okio.ByteString; + +import java.nio.ByteBuffer; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @author songsong.shao + * @date 2025/11/5 + */ +@Slf4j +public class OkHttpWebSocketClientForAudio extends OkHttpWebSocketClient { + + private static final AtomicInteger STREAMING_REQUEST_THREAD_NUM = new AtomicInteger(0); + private static final AtomicBoolean SHUTDOWN_INITIATED = new AtomicBoolean(false); + + private static final ExecutorService STREAMING_REQUEST_EXECUTOR = + new ThreadPoolExecutor(1, 100, 60L, TimeUnit.SECONDS, new SynchronousQueue<>(), r -> { + Thread t = new Thread(r, "WS-STREAMING-REQ-Worker-" + STREAMING_REQUEST_THREAD_NUM.updateAndGet(n -> n == Integer.MAX_VALUE ? 0 : n + 1)); + t.setDaemon(true); + return t; + }); + + public OkHttpWebSocketClientForAudio(OkHttpClient client, boolean passTaskStarted) { + super(client, passTaskStarted); + log.info("Use OkHttpWebSocketClientForAudio"); + } + + @Override + protected CompletableFuture sendStreamRequest(FullDuplexRequest req) { + CompletableFuture future = + CompletableFuture.runAsync( + () -> { + try { + isFirstMessage.set(false); + + JsonObject startMessage = req.getStartTaskMessage(); + log.info("send run-task request {}", JsonUtils.toJson(startMessage)); + String taskId = + startMessage.get("header").getAsJsonObject().get("task_id").getAsString(); + // send start message out. + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(startMessage), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + + Flowable streamingData = req.getStreamingData(); + streamingData.subscribe( + data -> { + try { + if (data instanceof String) { + JsonObject continueData = req.getContinueMessage((String) data, taskId); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(continueData), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else if (data instanceof byte[]) { + sendBinaryWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + ByteString.of((byte[]) data), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else if (data instanceof ByteBuffer) { + sendBinaryWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + ByteString.of((ByteBuffer) data), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else { + JsonObject continueData = req.getContinueMessage(data, taskId); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(continueData), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } + } catch (Throwable ex) { + log.error(String.format("sendStreamData exception: %s", ex.getMessage())); + responseEmitter.onError(ex); + } + }, + err -> { + log.error(String.format("Get stream data error!")); + responseEmitter.onError(err); + }, + new Action() { + @Override + public void run() throws Exception { + log.debug(String.format("Stream data send completed!")); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(req.getFinishedTaskMessage(taskId)), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } + }); + } catch (Throwable ex) { + log.error(String.format("sendStreamData exception: %s", ex.getMessage())); + responseEmitter.onError(ex); + } + }); + return future; + } + + static {//auto close when jvm shutdown + Runtime.getRuntime().addShutdownHook(new Thread(OkHttpWebSocketClientForAudio::shutdownStreamingExecutor)); + } + /** + * Shutdown the streaming request executor gracefully. + * This method should be called when the application is shutting down + * to ensure proper resource cleanup. + */ + private static void shutdownStreamingExecutor() { + if (!SHUTDOWN_INITIATED.compareAndSet(false, true)) { + log.debug("Shutdown already in progress"); + return; + } + + if (!STREAMING_REQUEST_EXECUTOR.isShutdown()) { + log.debug("Shutting down streaming request executor..."); + STREAMING_REQUEST_EXECUTOR.shutdown(); + try { + // Wait up to 60 seconds for existing tasks to terminate + if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { + log.warn("Streaming request executor did not terminate in 60 seconds, forcing shutdown..."); + STREAMING_REQUEST_EXECUTOR.shutdownNow(); + // Wait up to 60 seconds for tasks to respond to being cancelled + if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { + log.error("Streaming request executor did not terminate"); + } + } + } catch (InterruptedException ie) { + // (Re-)Cancel if current thread also interrupted + STREAMING_REQUEST_EXECUTOR.shutdownNow(); + // Preserve interrupt status + Thread.currentThread().interrupt(); + } + log.info("Streaming request executor shut down completed"); + } + } +} From fa3c086a8e73b2ce4a108da024ab1d7bc27f1168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Wed, 5 Nov 2025 11:34:50 +0800 Subject: [PATCH 19/64] fix:websocket client for audio --- .../protocol/okhttp/OkHttpWebSocketClientForAudio.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java index 05e9558..d3e8654 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java @@ -122,7 +122,7 @@ public void run() throws Exception { log.error(String.format("sendStreamData exception: %s", ex.getMessage())); responseEmitter.onError(ex); } - }); + },STREAMING_REQUEST_EXECUTOR); return future; } From cc18a8b749678d103257d7b9ccb886800da5a95c Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 5 Nov 2025 11:51:09 +0800 Subject: [PATCH 20/64] release version 2.21.16 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b9139af..7209e0d 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.21.15 + 2.21.16 8 From e9b3eb5c84597a7792df35dc9a31abc3a170176c Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 5 Nov 2025 15:12:25 +0800 Subject: [PATCH 21/64] fix java doc error --- .../dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java index d3e8654..b7b38f1 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java @@ -16,7 +16,6 @@ /** * @author songsong.shao - * @date 2025/11/5 */ @Slf4j public class OkHttpWebSocketClientForAudio extends OkHttpWebSocketClient { From a1740f6405d7e67c0ee6c7b152beeedec86ed917 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 5 Nov 2025 19:06:13 +0800 Subject: [PATCH 22/64] release version 2.22.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index be7956e..687ef99 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.21.16 + 2.22.0 8 From 9431364662b02c9c311e806b184d771e220bf621 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Mon, 10 Nov 2025 16:07:41 +0800 Subject: [PATCH 23/64] feat(protocol): add user agent (#153) --- samples/GenerationStreamCall.java | 3 +- .../dashscope/aigc/generation/Generation.java | 10 +++++ .../MultiModalConversation.java | 10 +++++ .../dashscope/protocol/DashScopeHeaders.java | 39 ++++++++++++++++++- .../dashscope/protocol/HalfDuplexRequest.java | 10 ++++- .../okhttp/OkHttpWebSocketClient.java | 9 ++++- 6 files changed, 76 insertions(+), 5 deletions(-) diff --git a/samples/GenerationStreamCall.java b/samples/GenerationStreamCall.java index de5e1ff..3098e3a 100644 --- a/samples/GenerationStreamCall.java +++ b/samples/GenerationStreamCall.java @@ -35,8 +35,9 @@ public static void streamCall() GenerationParam param = GenerationParam.builder() .model("qwen-turbo") - .prompt("如何做土豆炖猪脚?") + .prompt("你好") .temperature((float) 1.0) + .incrementalOutput(false) .repetitionPenalty((float) 1.0) .topK(50) .build(); diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java b/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java index 0f99b3f..80cdf3e 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java @@ -162,6 +162,11 @@ public Flowable streamCall(HalfDuplexServiceParam param) // Intercept and modify incrementalOutput parameter if needed boolean toMergeResponse = modifyIncrementalOutput(param); + // Build custom user agent suffix with incremental_to_full flag + int flagValue = toMergeResponse ? 1 : 0; + String userAgentSuffix = String.format("incremental_to_full/%d", flagValue); + param.putHeader("user-agent", userAgentSuffix); + serviceOption.setIsSSE(true); serviceOption.setStreamingMode(StreamingMode.OUT); return syncApi.streamCall(param) @@ -193,6 +198,11 @@ public void streamCall(HalfDuplexServiceParam param, ResultCallback streamCall(MultiModalConversationP // Intercept and modify incrementalOutput parameter if needed boolean toMergeResponse = modifyIncrementalOutput(param); + // Build custom user agent suffix with incremental_to_full flag + int flagValue = toMergeResponse ? 1 : 0; + String userAgentSuffix = String.format("incremental_to_full/%d", flagValue); + param.putHeader("user-agent", userAgentSuffix); + serviceOption.setIsSSE(true); serviceOption.setStreamingMode(StreamingMode.OUT); preprocessInput(param); @@ -181,6 +186,11 @@ public void streamCall( // Intercept and modify incrementalOutput parameter if needed boolean toMergeResponse = modifyIncrementalOutput(param); + // Build custom user agent suffix with incremental_to_full flag + int flagValue = toMergeResponse ? 1 : 0; + String userAgentSuffix = String.format("incremental_to_full/%d", flagValue); + param.putHeader("user-agent", userAgentSuffix); + serviceOption.setIsSSE(true); serviceOption.setStreamingMode(StreamingMode.OUT); preprocessInput(param); diff --git a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java index 1daca1c..73c60ae 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java +++ b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java @@ -10,6 +10,11 @@ public final class DashScopeHeaders { public static String userAgent() { + return userAgent(null); + } + + // Generate user agent with optional custom suffix + public static String userAgent(String customUserAgent) { String userAgent = String.format( "dashscope/%s; java/%s; platform/%s; processor/%s", @@ -17,15 +22,30 @@ public static String userAgent() { System.getProperty("java.version"), System.getProperty("os.name"), System.getProperty("os.arch")); + if (customUserAgent != null && !customUserAgent.isEmpty()) { + userAgent += "; " + customUserAgent; + } return userAgent; } public static Map buildWebSocketHeaders( String apiKey, boolean isSecurityCheck, String workspace, Map customHeaders) throws NoApiKeyException { + return buildWebSocketHeaders(apiKey, isSecurityCheck, workspace, + customHeaders, null); + } + + // Build WebSocket headers with optional custom user agent suffix + public static Map buildWebSocketHeaders( + String apiKey, + boolean isSecurityCheck, + String workspace, + Map customHeaders, + String customUserAgent) + throws NoApiKeyException { Map headers = new HashMap<>(); headers.put("Authorization", "Bearer " + ApiKey.getApiKey(apiKey)); - headers.put("user-agent", userAgent()); + headers.put("user-agent", userAgent(customUserAgent)); if (workspace != null && !workspace.isEmpty()) { headers.put("X-DashScope-WorkSpace", workspace); } @@ -47,9 +67,24 @@ public static Map buildHttpHeaders( String workspace, Map customHeaders) throws NoApiKeyException { + return buildHttpHeaders(apiKey, isSecurityCheck, protocol, isSSE, + isAsyncTask, workspace, customHeaders, null); + } + + // Build HTTP headers with optional custom user agent suffix + public static Map buildHttpHeaders( + String apiKey, + Boolean isSecurityCheck, + Protocol protocol, + Boolean isSSE, + Boolean isAsyncTask, + String workspace, + Map customHeaders, + String customUserAgent) + throws NoApiKeyException { Map headers = new HashMap<>(); headers.put("Authorization", "Bearer " + ApiKey.getApiKey(apiKey)); - headers.put("user-agent", userAgent()); + headers.put("user-agent", userAgent(customUserAgent)); if (isSecurityCheck) { headers.put("X-DashScope-DataInspection", "enable"); } diff --git a/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java b/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java index cbb4230..8c125fd 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java +++ b/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java @@ -88,6 +88,13 @@ private String getEncryptionKeyHeader(EncryptionConfig encryptionConfig) throws } public HttpRequest getHttpRequest() throws NoApiKeyException, ApiException { + // Extract and filter custom user agent from param headers + Map paramHeaders = param.getHeaders(); + String customUserAgent = paramHeaders != null ? paramHeaders.get("user-agent") : null; + Map filteredHeaders = paramHeaders != null ? + new java.util.HashMap<>(paramHeaders) : new java.util.HashMap<>(); + filteredHeaders.remove("user-agent"); + Map requestHeaders = DashScopeHeaders.buildHttpHeaders( param.getApiKey(), @@ -96,7 +103,8 @@ public HttpRequest getHttpRequest() throws NoApiKeyException, ApiException { serviceOption.getIsSSE(), serviceOption.getIsAsyncTask(), param.getWorkspace(), - param.getHeaders()); + filteredHeaders, + customUserAgent); if (getHttpMethod() == HttpMethod.GET) { return HttpRequest.builder() diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java index 44235d8..cf3f703 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java @@ -62,12 +62,19 @@ private Request buildConnectionRequest( Map customHeaders, String baseWebSocketUrl) throws NoApiKeyException { + // Extract and filter custom user agent from param headers + String customUserAgent = customHeaders != null ? + customHeaders.get("user-agent") : null; + Map filteredHeaders = customHeaders != null ? + new java.util.HashMap<>(customHeaders) : new java.util.HashMap<>(); + filteredHeaders.remove("user-agent"); + // build the request builder. Builder bd = new Request.Builder(); bd.headers( Headers.of( DashScopeHeaders.buildWebSocketHeaders( - apiKey, isSecurityCheck, workspace, customHeaders))); + apiKey, isSecurityCheck, workspace, filteredHeaders, customUserAgent))); String url = Constants.baseWebsocketApiUrl; if (baseWebSocketUrl != null) { url = baseWebSocketUrl; From 3081ef91e2df1bb7d136c75e5fc594c514034b3f Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 12 Nov 2025 10:38:06 +0800 Subject: [PATCH 24/64] release version 2.22.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 687ef99..723cef2 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.0 + 2.22.1 8 From cee5f8848e0471647fd69228b02cc72686f79c29 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Sun, 16 Nov 2025 15:53:03 +0800 Subject: [PATCH 25/64] feat(oss): reuse upload certificate (#154) --- samples/MultiModalConversationQwenVL.java | 27 +- .../MultiModalConversation.java | 18 +- .../dashscope/utils/OSSUploadCertificate.java | 41 +++ .../com/alibaba/dashscope/utils/OSSUtils.java | 67 +++- .../dashscope/utils/PreprocessInputImage.java | 142 +++++++- .../utils/PreprocessMessageInput.java | 327 +++++++++++++++--- .../alibaba/dashscope/utils/UploadResult.java | 24 ++ 7 files changed, 568 insertions(+), 78 deletions(-) create mode 100644 src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java create mode 100644 src/main/java/com/alibaba/dashscope/utils/UploadResult.java diff --git a/samples/MultiModalConversationQwenVL.java b/samples/MultiModalConversationQwenVL.java index 712efb1..83ec1e0 100644 --- a/samples/MultiModalConversationQwenVL.java +++ b/samples/MultiModalConversationQwenVL.java @@ -52,7 +52,7 @@ public String call() { return currentTime; } } - public static void videoImageListSample() throws ApiException, NoApiKeyException, UploadFileException { + public static void imageSample() throws ApiException, NoApiKeyException, UploadFileException { MultiModalConversation conversation = new MultiModalConversation(); MultiModalMessageItemText systemText = new MultiModalMessageItemText("你是达摩院的生活助手机器人。"); @@ -71,6 +71,28 @@ public static void videoImageListSample() throws ApiException, NoApiKeyException }); } + public static void videoSample() throws ApiException, NoApiKeyException, UploadFileException { + MultiModalConversation conv = new MultiModalConversation(); + MultiModalMessage systemMessage = MultiModalMessage.builder() + .role(Role.SYSTEM.getValue()) + .content(Arrays.asList(Collections.singletonMap("text", "You are a helpful assistant."))) + .build(); + MultiModalMessage userMessage = MultiModalMessage.builder() + .role(Role.USER.getValue()) + .content(Arrays.asList(Collections.singletonMap("video", Arrays.asList( + "/Users/zhiyi/Downloads/vl_data/1.jpg", + "/Users/zhiyi/Downloads/vl_data/2.jpg", + "/Users/zhiyi/Downloads/vl_data/3.jpg", + "/Users/zhiyi/Downloads/vl_data/4.jpg")), + Collections.singletonMap("text", "描述这个视频的具体s过程"))) + .build(); + MultiModalConversationParam param = MultiModalConversationParam.builder() + .model("qwen-vl-max-latest").message(systemMessage) + .message(userMessage).build(); + MultiModalConversationResult result = conv.call(param); + System.out.print(JsonUtils.toJson(result)); + } + public static void streamCallWithToolCalls() throws NoApiKeyException, ApiException, UploadFileException { SchemaGeneratorConfigBuilder configBuilder = @@ -133,7 +155,8 @@ public static void streamCallWithToolCalls() public static void main(String[] args) { try { - videoImageListSample(); +// imageSample(); + videoSample(); // streamCallWithToolCalls(); } catch (ApiException | NoApiKeyException | UploadFileException e) { System.out.println(e.getMessage()); diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java index 9909d80..5e22664 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java @@ -8,6 +8,7 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; import com.alibaba.dashscope.protocol.*; +import com.alibaba.dashscope.utils.OSSUploadCertificate; import com.alibaba.dashscope.utils.ParamUtils; import com.alibaba.dashscope.utils.PreprocessMessageInput; import io.reactivex.Flowable; @@ -225,19 +226,26 @@ public void onError(Exception e) { private void preprocessInput(MultiModalConversationParam param) throws NoApiKeyException, UploadFileException { boolean hasUpload = false; + OSSUploadCertificate certificate = null; + for (Object msg : param.getMessages()) { boolean isUpload = false; if (msg instanceof MultiModalConversationMessage) { - isUpload = + PreprocessMessageInput.PreprocessResult result = PreprocessMessageInput.preProcessMessageInputs( param.getModel(), ((MultiModalConversationMessage) msg).getContent(), - param.getApiKey()); - + param.getApiKey(), + certificate); + isUpload = result.hasUpload(); + certificate = result.getCertificate(); } else { - isUpload = + PreprocessMessageInput.PreprocessResult result = PreprocessMessageInput.preProcessMultiModalMessageInputs( - param.getModel(), (MultiModalMessage) msg, param.getApiKey()); + param.getModel(), (MultiModalMessage) msg, + param.getApiKey(), certificate); + isUpload = result.hasUpload(); + certificate = result.getCertificate(); } if (isUpload && !hasUpload) { hasUpload = true; diff --git a/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java b/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java new file mode 100644 index 0000000..7b15fc2 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java @@ -0,0 +1,41 @@ +package com.alibaba.dashscope.utils; + +import lombok.Data; + +/** + * OSS upload certificate for reuse across multiple file uploads. + */ +@Data +public class OSSUploadCertificate { + private String uploadHost; + private String ossAccessKeyId; + private String signature; + private String policy; + private String uploadDir; + private String xOssObjectAcl; + private String xOssForbidOverwrite; + + /** + * Create certificate from upload info data. + * + * @param uploadHost OSS upload host + * @param ossAccessKeyId OSS access key ID + * @param signature Upload signature + * @param policy Upload policy + * @param uploadDir Upload directory + * @param xOssObjectAcl OSS object ACL + * @param xOssForbidOverwrite OSS forbid overwrite flag + */ + public OSSUploadCertificate(String uploadHost, String ossAccessKeyId, + String signature, String policy, String uploadDir, + String xOssObjectAcl, String xOssForbidOverwrite) { + this.uploadHost = uploadHost; + this.ossAccessKeyId = ossAccessKeyId; + this.signature = signature; + this.policy = policy; + this.uploadDir = uploadDir; + this.xOssObjectAcl = xOssObjectAcl; + this.xOssForbidOverwrite = xOssForbidOverwrite; + } +} + diff --git a/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java b/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java index 7c461c5..56a245a 100644 --- a/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java +++ b/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java @@ -33,22 +33,65 @@ @Slf4j public final class OSSUtils { + /** + * Upload file to OSS without certificate reuse. + * + * @param model Model name + * @param filePath Local file path + * @param apiKey API key + * @return OSS URL + * @throws NoApiKeyException If API key is missing + */ public static String upload(String model, String filePath, String apiKey) throws NoApiKeyException { + UploadResult result = uploadWithCertificate(model, filePath, apiKey, + null); + return result.getOssUrl(); + } + + /** + * Upload file to OSS with optional certificate reuse. + * + * @param model Model name + * @param filePath Local file path + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return UploadResult containing OSS URL and certificate + * @throws NoApiKeyException If API key is missing + */ + public static UploadResult uploadWithCertificate(String model, + String filePath, String apiKey, OSSUploadCertificate certificate) + throws NoApiKeyException { OkHttpClient client = OkHttpClientFactory.getOkHttpClient(); - DashScopeResult uploadInfo = get_upload_certificate(model, apiKey); - JsonObject outputData = ((JsonObject) uploadInfo.getOutput()).getAsJsonObject("data"); + OSSUploadCertificate cert = certificate; + + // Get certificate if not provided + if (cert == null) { + DashScopeResult uploadInfo = get_upload_certificate(model, apiKey); + JsonObject outputData = ((JsonObject) uploadInfo.getOutput()) + .getAsJsonObject("data"); + cert = new OSSUploadCertificate( + outputData.get("upload_host").getAsString(), + outputData.get("oss_access_key_id").getAsString(), + outputData.get("signature").getAsString(), + outputData.get("policy").getAsString(), + outputData.get("upload_dir").getAsString(), + outputData.get("x_oss_object_acl").getAsString(), + outputData.get("x_oss_forbid_overwrite").getAsString() + ); + } + Map headers = new HashMap<>(); headers.put("user-agent", DashScopeHeaders.userAgent()); headers.put("Accept", "application/json"); File uploadFile = new File(filePath); - String host = outputData.get("upload_host").getAsString(); - String ossAccessKeyId = outputData.get("oss_access_key_id").getAsString(); - String signature = outputData.get("signature").getAsString(); - String policy = outputData.get("policy").getAsString(); - String key = outputData.get("upload_dir").getAsString() + "/" + uploadFile.getName(); - String xOssObjectAcl = outputData.get("x_oss_object_acl").getAsString(); - String xOssForbidOverwrite = outputData.get("x_oss_forbid_overwrite").getAsString(); + String host = cert.getUploadHost(); + String ossAccessKeyId = cert.getOssAccessKeyId(); + String signature = cert.getSignature(); + String policy = cert.getPolicy(); + String key = cert.getUploadDir() + "/" + uploadFile.getName(); + String xOssObjectAcl = cert.getXOssObjectAcl(); + String xOssForbidOverwrite = cert.getXOssForbidOverwrite(); RequestBody requestBody = new MultipartBody.Builder() @@ -67,13 +110,15 @@ public static String upload(String model, String filePath, String apiKey) RequestBody.create(MediaType.parse(getContentType(filePath)), uploadFile)) .build(); - Request request = new Request.Builder().url(host).post(requestBody).build(); + Request request = new Request.Builder().url(host).post(requestBody) + .build(); try (Response response = client.newCall(request).execute()) { if (!response.isSuccessful()) { Status status = parseFailed(response); throw new ApiException(status); } - return String.format("oss://%s", key); + String ossUrl = String.format("oss://%s", key); + return new UploadResult(ossUrl, cert); } catch (Throwable e) { throw new ApiException(e); } diff --git a/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java b/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java index af42b25..0d3f0f3 100644 --- a/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java +++ b/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java @@ -10,32 +10,78 @@ public final class PreprocessInputImage { - public static boolean checkAndUploadImage( - String model, Map values, String apiKey) + /** + * Check and upload multiple images with certificate reuse support. + * + * @param model Model name + * @param values Map of image values + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadImageResult containing upload status and cert + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadImageResult checkAndUploadImages( + String model, Map values, String apiKey, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean isUpload = false; + OSSUploadCertificate cert = certificate; for (Map.Entry entry : values.entrySet()) { String v = entry.getValue(); if (v == null || v.isEmpty()) { continue; } - String dstValue = checkAndUploadImage(model, apiKey, v); - if (!dstValue.equals(v)) { + CheckAndUploadOneImageResult result = + checkAndUploadOneImage(model, apiKey, v, cert); + if (!result.getFileUrl().equals(v)) { isUpload = true; - entry.setValue(dstValue); + entry.setValue(result.getFileUrl()); } + cert = result.getCertificate(); } - return isUpload; + return new CheckAndUploadImageResult(isUpload, cert); } - public static String checkAndUploadImage( - String model, String apiKey, String value) + /** + * Check and upload multiple images without certificate reuse (legacy). + * + * @param model Model name + * @param values Map of image values + * @param apiKey API key + * @return true if any file was uploaded + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static boolean checkAndUploadImage( + String model, Map values, String apiKey) + throws NoApiKeyException, UploadFileException { + CheckAndUploadImageResult result = checkAndUploadImages(model, + values, apiKey, null); + return result.isUpload(); + } + + /** + * Check and upload one image with certificate reuse support. + * + * @param model Model name + * @param apiKey API key + * @param value Image file path + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadOneImageResult containing file URL and cert + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadOneImageResult checkAndUploadOneImage( + String model, String apiKey, String value, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { String dstValue = value; + OSSUploadCertificate cert = certificate; if (value.startsWith("http")){ - return dstValue; + return new CheckAndUploadOneImageResult(dstValue, cert); } if (value.startsWith(ApiKeywords.FILE_PATH_SCHEMA)) { @@ -43,20 +89,86 @@ public static String checkAndUploadImage( URI fileURI = new URI(value); File f = new File(fileURI); if (f.exists()) { - String fileUrl = OSSUtils.upload(model, f.getAbsolutePath(), apiKey); - if (fileUrl.isEmpty()) { - throw new UploadFileException(String.format("Uploading file: %s failed", value)); + UploadResult result = OSSUtils.uploadWithCertificate( + model, f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl().isEmpty()) { + throw new UploadFileException(String.format( + "Uploading file: %s failed", value)); } - dstValue = fileUrl; + dstValue = result.getOssUrl(); + cert = result.getCertificate(); } else { - throw new UploadFileException(String.format("Local file: %s not exists.", value)); + throw new UploadFileException(String.format( + "Local file: %s not exists.", value)); } } catch (URISyntaxException e) { throw new UploadFileException(e.getMessage()); } } - return dstValue; + return new CheckAndUploadOneImageResult(dstValue, cert); + } + + /** + * Check and upload one image without certificate reuse (legacy). + * + * @param model Model name + * @param apiKey API key + * @param value Image file path + * @return File URL + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static String checkAndUploadImage( + String model, String apiKey, String value) + throws NoApiKeyException, UploadFileException { + CheckAndUploadOneImageResult result = checkAndUploadOneImage(model, + apiKey, value, null); + return result.getFileUrl(); + } + + /** + * Result of check and upload image operation. + */ + public static class CheckAndUploadImageResult { + private boolean upload; + private OSSUploadCertificate certificate; + + public CheckAndUploadImageResult(boolean upload, + OSSUploadCertificate certificate) { + this.upload = upload; + this.certificate = certificate; + } + + public boolean isUpload() { + return upload; + } + + public OSSUploadCertificate getCertificate() { + return certificate; + } + } + + /** + * Result of check and upload one image operation. + */ + public static class CheckAndUploadOneImageResult { + private String fileUrl; + private OSSUploadCertificate certificate; + + public CheckAndUploadOneImageResult(String fileUrl, + OSSUploadCertificate certificate) { + this.fileUrl = fileUrl; + this.certificate = certificate; + } + + public String getFileUrl() { + return fileUrl; + } + + public OSSUploadCertificate getCertificate() { + return certificate; + } } } diff --git a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java index e3a196b..cf97e47 100644 --- a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java +++ b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java @@ -25,77 +25,154 @@ public static boolean isValidPath(String pathString) { } } - public static boolean checkAndUpload( - String model, MultiModalMessageItemBase message, String apiKey) + /** + * Check and upload file with certificate reuse support. + * + * @param model Model name + * @param message Message item containing file path + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadResult containing upload status and certificate + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadResult checkAndUpload( + String model, MultiModalMessageItemBase message, String apiKey, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean isUpload = false; + OSSUploadCertificate cert = certificate; + if (!message.getModal().equals("text") && message.getContent().startsWith(ApiKeywords.FILE_PATH_SCHEMA)) { try { URI fileURI = new URI(message.getContent()); File f = new File(fileURI); if (f.exists()) { - String fileUrl = OSSUtils.upload(model, f.getAbsolutePath(), apiKey); - if (fileUrl == null) { + UploadResult result = OSSUtils.uploadWithCertificate(model, + f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl() == null) { throw new UploadFileException( - String.format("Uploading file: %s failed", message.getContent())); + String.format("Uploading file: %s failed", + message.getContent())); } - message.setContent(fileUrl); + message.setContent(result.getOssUrl()); + cert = result.getCertificate(); isUpload = true; } else { throw new UploadFileException( - String.format("Local file: %s not exists.", message.getContent())); + String.format("Local file: %s not exists.", + message.getContent())); } } catch (URISyntaxException e) { throw new UploadFileException(e.getMessage()); } - } else if (!message.getModal().equals("text") && message.getContent().startsWith("oss://")) { + } else if (!message.getModal().equals("text") + && message.getContent().startsWith("oss://")) { isUpload = true; - } else if (!message.getModal().equals("text") && !message.getContent().startsWith("http")) { + } else if (!message.getModal().equals("text") + && !message.getContent().startsWith("http")) { if (isValidPath(message.getContent())) { File f = new File(message.getContent()); if (f.exists()) { - String fileUrl = OSSUtils.upload(model, f.getAbsolutePath(), apiKey); - if (fileUrl == null) { + UploadResult result = OSSUtils.uploadWithCertificate(model, + f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl() == null) { throw new UploadFileException( - String.format("Uploading file: %s failed", message.getContent())); + String.format("Uploading file: %s failed", + message.getContent())); } - message.setContent(fileUrl); + message.setContent(result.getOssUrl()); + cert = result.getCertificate(); isUpload = true; } } } - return isUpload; + return new CheckAndUploadResult(isUpload, cert); } - public static boolean preProcessMessageInputs( - String model, List messages, String apiKey) throws NoApiKeyException, UploadFileException { + /** + * Preprocess message inputs with certificate reuse support. + * + * @param model Model name + * @param messages List of message items + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return PreprocessResult containing upload status and certificate + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static PreprocessResult + preProcessMessageInputs(String model, List messages, String apiKey, + OSSUploadCertificate certificate) + throws NoApiKeyException, UploadFileException { boolean hasUpload = false; + OSSUploadCertificate cert = certificate; + for (MultiModalMessageItemBase elem : messages) { - boolean isUpload = checkAndUpload(model, elem, apiKey); - if (isUpload && !hasUpload) { + CheckAndUploadResult result = checkAndUpload(model, elem, apiKey, + cert); + if (result.isUpload() && !hasUpload) { hasUpload = true; } + cert = result.getCertificate(); } - return hasUpload; + return new PreprocessResult(hasUpload, cert); } - public static String checkAndUploadOneMultiModalMessage( - String model, String apiKey, String key, String value) + /** + * Preprocess message inputs without certificate reuse (legacy). + * + * @param model Model name + * @param messages List of message items + * @param apiKey API key + * @return true if any file was uploaded + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static boolean + preProcessMessageInputs(String model, List messages, String apiKey) + throws NoApiKeyException, UploadFileException { + PreprocessResult result = preProcessMessageInputs(model, messages, + apiKey, null); + return result.hasUpload(); + } + + /** + * Check and upload one multimodal message with certificate reuse. + * + * @param model Model name + * @param apiKey API key + * @param key Message key + * @param value Message value (file path) + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadOneResult containing file URL and certificate + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadOneResult checkAndUploadOneMultiModalMessage( + String model, String apiKey, String key, String value, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { String dstValue = value; + OSSUploadCertificate cert = certificate; + if (value.startsWith(ApiKeywords.FILE_PATH_SCHEMA)) { try { URI fileURI = new URI(value); File f = new File(fileURI); if (f.exists()) { - String fileUrl = OSSUtils.upload(model, f.getAbsolutePath(), apiKey); - if (fileUrl == null) { - throw new UploadFileException(String.format("Uploading file: %s failed", value)); + UploadResult result = OSSUtils.uploadWithCertificate(model, + f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl() == null) { + throw new UploadFileException(String.format( + "Uploading file: %s failed", value)); } - dstValue = fileUrl; + dstValue = result.getOssUrl(); + cert = result.getCertificate(); } else { - throw new UploadFileException(String.format("Local file: %s not exists.", value)); + throw new UploadFileException(String.format( + "Local file: %s not exists.", value)); } } catch (URISyntaxException e) { throw new UploadFileException(e.getMessage()); @@ -104,24 +181,60 @@ public static String checkAndUploadOneMultiModalMessage( if (isValidPath(value)) { File f = new File(value); if (f.exists()) { - String fileUrl = OSSUtils.upload(model, f.getAbsolutePath(), apiKey); - if (fileUrl == null) { - throw new UploadFileException(String.format("Uploading file: %s failed", value)); + UploadResult result = OSSUtils.uploadWithCertificate(model, + f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl() == null) { + throw new UploadFileException(String.format( + "Uploading file: %s failed", value)); } - dstValue = fileUrl; + dstValue = result.getOssUrl(); + cert = result.getCertificate(); } } } - return dstValue; + return new CheckAndUploadOneResult(dstValue, cert); } - public static boolean checkAndUploadMultiModalMessage( - String model, Map.Entry entry, String apiKey) + /** + * Check and upload one multimodal message without certificate reuse. + * + * @param model Model name + * @param apiKey API key + * @param key Message key + * @param value Message value (file path) + * @return File URL + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static String checkAndUploadOneMultiModalMessage( + String model, String apiKey, String key, String value) + throws NoApiKeyException, UploadFileException { + CheckAndUploadOneResult result = checkAndUploadOneMultiModalMessage( + model, apiKey, key, value, null); + return result.getFileUrl(); + } + + /** + * Check and upload multimodal message with certificate reuse. + * + * @param model Model name + * @param entry Message entry + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadResult containing upload status and certificate + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadResult checkAndUploadMultiModalMessage( + String model, Map.Entry entry, String apiKey, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean isUpload = false; + OSSUploadCertificate cert = certificate; String key = entry.getKey(); Object value = entry.getValue(); + if (value instanceof List) { List dstValue = (List) value; for (int i = 0; i < dstValue.size(); i++) { @@ -130,11 +243,14 @@ public static boolean checkAndUploadMultiModalMessage( if (!key.equals("text") && ((String)v).startsWith("oss://")) { isUpload = true; } else { - String dstV = checkAndUploadOneMultiModalMessage(model, apiKey, key, (String) v); - if (!dstV.equals(v)) { + CheckAndUploadOneResult result = + checkAndUploadOneMultiModalMessage(model, apiKey, key, + (String) v, cert); + if (!result.getFileUrl().equals(v)) { isUpload = true; - ((List) dstValue).set(i, dstV); + ((List) dstValue).set(i, result.getFileUrl()); } + cert = result.getCertificate(); } } } @@ -143,33 +259,154 @@ public static boolean checkAndUploadMultiModalMessage( if (!key.equals("text") && ((String)value).startsWith("oss://")) { isUpload = true; } else { - String dstValue = checkAndUploadOneMultiModalMessage(model, apiKey, key, (String) value); - if (!dstValue.equals(value)) { + CheckAndUploadOneResult result = + checkAndUploadOneMultiModalMessage(model, apiKey, key, + (String) value, cert); + if (!result.getFileUrl().equals(value)) { isUpload = true; - entry.setValue(dstValue); + entry.setValue(result.getFileUrl()); } + cert = result.getCertificate(); } } - return isUpload; + return new CheckAndUploadResult(isUpload, cert); } - public static boolean preProcessMultiModalMessageInputs( - String model, MultiModalMessage messages, String apiKey) + /** + * Check and upload multimodal message without certificate reuse. + * + * @param model Model name + * @param entry Message entry + * @param apiKey API key + * @return true if any file was uploaded + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static boolean checkAndUploadMultiModalMessage( + String model, Map.Entry entry, String apiKey) + throws NoApiKeyException, UploadFileException { + CheckAndUploadResult result = checkAndUploadMultiModalMessage(model, + entry, apiKey, null); + return result.isUpload(); + } + + /** + * Preprocess multimodal message inputs with certificate reuse. + * + * @param model Model name + * @param messages Multimodal message + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return PreprocessResult containing upload status and certificate + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static PreprocessResult preProcessMultiModalMessageInputs( + String model, MultiModalMessage messages, String apiKey, + OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean hasUpload = false; + OSSUploadCertificate cert = certificate; List> content = new ArrayList<>(); + for (Map item : messages.getContent()) { content.add(new HashMap<>(item)); } for (Map item : content) { for (Map.Entry entry : item.entrySet()) { - boolean isUpload = checkAndUploadMultiModalMessage(model, entry, apiKey); - if (isUpload && !hasUpload) { + CheckAndUploadResult result = checkAndUploadMultiModalMessage( + model, entry, apiKey, cert); + if (result.isUpload() && !hasUpload) { hasUpload = true; } + cert = result.getCertificate(); } } messages.setContent(content); - return hasUpload; + return new PreprocessResult(hasUpload, cert); + } + + /** + * Preprocess multimodal message inputs without certificate reuse. + * + * @param model Model name + * @param messages Multimodal message + * @param apiKey API key + * @return true if any file was uploaded + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static boolean preProcessMultiModalMessageInputs( + String model, MultiModalMessage messages, String apiKey) + throws NoApiKeyException, UploadFileException { + PreprocessResult result = preProcessMultiModalMessageInputs(model, + messages, apiKey, null); + return result.hasUpload(); + } + + /** + * Result of check and upload operation. + */ + public static class CheckAndUploadResult { + private boolean upload; + private OSSUploadCertificate certificate; + + public CheckAndUploadResult(boolean upload, + OSSUploadCertificate certificate) { + this.upload = upload; + this.certificate = certificate; + } + + public boolean isUpload() { + return upload; + } + + public OSSUploadCertificate getCertificate() { + return certificate; + } + } + + /** + * Result of check and upload one operation. + */ + public static class CheckAndUploadOneResult { + private String fileUrl; + private OSSUploadCertificate certificate; + + public CheckAndUploadOneResult(String fileUrl, + OSSUploadCertificate certificate) { + this.fileUrl = fileUrl; + this.certificate = certificate; + } + + public String getFileUrl() { + return fileUrl; + } + + public OSSUploadCertificate getCertificate() { + return certificate; + } + } + + /** + * Result of preprocess operation. + */ + public static class PreprocessResult { + private boolean hasUpload; + private OSSUploadCertificate certificate; + + public PreprocessResult(boolean hasUpload, + OSSUploadCertificate certificate) { + this.hasUpload = hasUpload; + this.certificate = certificate; + } + + public boolean hasUpload() { + return hasUpload; + } + + public OSSUploadCertificate getCertificate() { + return certificate; + } } } diff --git a/src/main/java/com/alibaba/dashscope/utils/UploadResult.java b/src/main/java/com/alibaba/dashscope/utils/UploadResult.java new file mode 100644 index 0000000..d3135ba --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/utils/UploadResult.java @@ -0,0 +1,24 @@ +package com.alibaba.dashscope.utils; + +import lombok.Data; + +/** + * Result of file upload containing OSS URL and certificate. + */ +@Data +public class UploadResult { + private String ossUrl; + private OSSUploadCertificate certificate; + + /** + * Create upload result. + * + * @param ossUrl OSS URL of uploaded file + * @param certificate Upload certificate used + */ + public UploadResult(String ossUrl, OSSUploadCertificate certificate) { + this.ossUrl = ossUrl; + this.certificate = certificate; + } +} + From fd4f75388e3028860db023ad53531e9bca1ed7d1 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Sun, 16 Nov 2025 16:31:18 +0800 Subject: [PATCH 26/64] feat(search): support prepend_search_result option (#155) --- samples/GenerationStreamCall.java | 2 ++ .../alibaba/dashscope/aigc/generation/SearchOptions.java | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/samples/GenerationStreamCall.java b/samples/GenerationStreamCall.java index 3098e3a..dd7d021 100644 --- a/samples/GenerationStreamCall.java +++ b/samples/GenerationStreamCall.java @@ -109,12 +109,14 @@ public static void streamCallWithSearchOptions() .prompt("联网搜索明天杭州天气如何?") .enableSearch(true) .resultFormat("message") + .incrementalOutput(true) .searchOptions(SearchOptions.builder() .enableSource(true) .enableCitation(true) .citationFormat("[ref_]") .searchStrategy("pro_max") .forcedSearch(true) + .prependSearchResult(true) .build()) .build(); Flowable result = gen.streamCall(param); diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java b/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java index 6b1d35b..45f039d 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java @@ -33,4 +33,13 @@ public class SearchOptions { /** 搜索互联网信息的数量。standard:在请求时搜索5条互联网信息; pro:在请求时搜索10条互联网信息。 默认值为standard */ @SerializedName("search_strategy") private String searchStrategy; + + /** + * Whether the first data packet in streaming output contains only + * search source information. Only effective when enable_source is + * true and in streaming mode. Default is false. + */ + @SerializedName("prepend_search_result") + @Builder.Default + private Boolean prependSearchResult = null; } From 42a13a3e547033a85c5b69f034dd3a11e6db2d0b Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Tue, 18 Nov 2025 17:11:52 +0800 Subject: [PATCH 27/64] fix(message): message deserialization errror (#156) --- .../dashscope/common/MessageAdapter.java | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java b/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java index 926c2c0..8c9905f 100644 --- a/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java +++ b/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java @@ -119,6 +119,69 @@ private void writeCallFunction(JsonWriter out, ToolCallFunction toolCall) throws out.endObject(); } + // Parse array of content objects + @SuppressWarnings({"unchecked", "rawtypes"}) + private List parseContentList(List contentList) { + List contents = new ArrayList<>(); + for (LinkedTreeMap contentItem : contentList) { + String type = (String) contentItem.get("type"); + if (ApiKeywords.CONTENT_TYPE_TEXT.equals(type)) { + contents.add(parseTextContent(contentItem)); + } else if (ApiKeywords.CONTENT_TYPE_IMAGE_URL.equals(type)) { + contents.add(parseImageContent(contentItem)); + } + } + return contents; + } + + // Parse text content with optional cache_control + @SuppressWarnings({"unchecked", "rawtypes"}) + private MessageContentText parseTextContent(LinkedTreeMap contentItem) { + MessageContentText.MessageContentTextBuilder textBuilder = + MessageContentText.builder() + .type((String) contentItem.get("type")) + .text((String) contentItem.get(ApiKeywords.CONTENT_TYPE_TEXT)); + + // Parse cache_control if present + if (contentItem.containsKey(ApiKeywords.CONTENT_TYPE_CACHE_CONTROL)) { + LinkedTreeMap cacheControlMap = + (LinkedTreeMap) contentItem.get( + ApiKeywords.CONTENT_TYPE_CACHE_CONTROL); + MessageContentText.CacheControl.CacheControlBuilder cacheBuilder = + MessageContentText.CacheControl.builder() + .type((String) cacheControlMap.get("type")); + + // Handle ttl field - convert to String regardless of input type + if (cacheControlMap.containsKey("ttl")) { + Object ttlObj = cacheControlMap.get("ttl"); + cacheBuilder.ttl(ttlObj instanceof Number + ? String.valueOf(((Number) ttlObj).intValue()) + : String.valueOf(ttlObj)); + } + textBuilder.cacheControl(cacheBuilder.build()); + } + return textBuilder.build(); + } + + // Parse image_url content + @SuppressWarnings({"unchecked", "rawtypes"}) + private MessageContentImageURL parseImageContent( + LinkedTreeMap contentItem) { + LinkedTreeMap imageUrlMap = + (LinkedTreeMap) contentItem.get( + ApiKeywords.CONTENT_TYPE_IMAGE_URL); + ImageURL.ImageURLBuilder imageBuilder = + ImageURL.builder().url((String) imageUrlMap.get("url")); + if (imageUrlMap.containsKey("detail")) { + imageBuilder.detail((String) imageUrlMap.get("detail")); + } + return MessageContentImageURL.builder() + .type((String) contentItem.get("type")) + .imageURL(imageBuilder.build()) + .build(); + } + + @SuppressWarnings("unchecked") private ToolCallFunction convertToCallFunction(LinkedTreeMap toolCall) { ToolCallFunction functionCall = new ToolCallFunction(); if (toolCall.containsKey("function")) { @@ -147,6 +210,7 @@ private ToolCallFunction convertToCallFunction(LinkedTreeMap too } @Override + @SuppressWarnings({"unchecked", "rawtypes"}) public Message read(JsonReader in) throws IOException { Map objectMap = JsonUtils.gson.fromJson(in, Map.class); Message msg = new Message(); @@ -157,7 +221,13 @@ public Message read(JsonReader in) throws IOException { } if (objectMap.containsKey(ApiKeywords.CONTENT)) { - msg.setContent((String) objectMap.get(ApiKeywords.CONTENT)); + Object contentObj = objectMap.get(ApiKeywords.CONTENT); + // Handle both string and array content types + if (contentObj instanceof String) { + msg.setContent((String) contentObj); + } else if (contentObj instanceof List) { + msg.setContents(parseContentList((List) contentObj)); + } objectMap.remove(ApiKeywords.CONTENT); } From 265c988dfbd1694f8a56ed85dff7dd0639ef6230 Mon Sep 17 00:00:00 2001 From: lengjiayi <31887169+lengjiayi@users.noreply.github.com> Date: Fri, 21 Nov 2025 16:59:35 +0800 Subject: [PATCH 28/64] [Add] QwenTranscription (#157) --- .../audio/qwen_asr/QwenTranscription.java | 72 +++++++++++++++++ .../QwenTranscriptionApiKeywords.java | 36 +++++++++ .../qwen_asr/QwenTranscriptionMetrics.java | 24 ++++++ .../qwen_asr/QwenTranscriptionParam.java | 62 +++++++++++++++ .../qwen_asr/QwenTranscriptionQueryParam.java | 29 +++++++ .../qwen_asr/QwenTranscriptionResult.java | 79 +++++++++++++++++++ .../qwen_asr/QwenTranscriptionTaskResult.java | 27 +++++++ 7 files changed, 329 insertions(+) create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscription.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionApiKeywords.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionMetrics.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionTaskResult.java diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscription.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscription.java new file mode 100644 index 0000000..7f2983e --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscription.java @@ -0,0 +1,72 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +import com.alibaba.dashscope.api.AsynchronousApi; +import com.alibaba.dashscope.common.Function; +import com.alibaba.dashscope.common.Task; +import com.alibaba.dashscope.common.TaskGroup; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.protocol.ApiServiceOption; +import com.alibaba.dashscope.protocol.HttpMethod; +import com.alibaba.dashscope.protocol.Protocol; +import com.alibaba.dashscope.protocol.StreamingMode; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public final class QwenTranscription { + private final AsynchronousApi asyncApi; + private final ApiServiceOption createServiceOptions; + private final String baseUrl; + + public QwenTranscription() { + asyncApi = new AsynchronousApi(); + createServiceOptions = + ApiServiceOption.builder() + .protocol(Protocol.HTTP) + .httpMethod(HttpMethod.POST) + .streamingMode(StreamingMode.NONE) + .taskGroup(TaskGroup.AUDIO.getValue()) + .task(Task.ASR.getValue()) + .function(Function.TRANSCRIPTION.getValue()) + .isAsyncTask(true) + .build(); + this.baseUrl = null; + } + + public QwenTranscriptionResult asyncCall(QwenTranscriptionParam param) { + try { + return QwenTranscriptionResult.fromDashScopeResult( + asyncApi.asyncCall(param, createServiceOptions)); + } catch (NoApiKeyException e) { + throw new ApiException(e); + } + } + + public QwenTranscriptionResult wait(QwenTranscriptionQueryParam queryParam) { + try { + return QwenTranscriptionResult.fromDashScopeResult( + asyncApi.wait( + queryParam.getTaskId(), + queryParam.getApiKey(), + baseUrl, + queryParam.getCustomHeaders())); + } catch (NoApiKeyException e) { + throw new ApiException(e); + } + } + + public QwenTranscriptionResult fetch(QwenTranscriptionQueryParam queryParam) { + try { + return QwenTranscriptionResult.fromDashScopeResult( + asyncApi.fetch( + queryParam.getTaskId(), + queryParam.getApiKey(), + baseUrl, + queryParam.getCustomHeaders())); + } catch (NoApiKeyException e) { + throw new ApiException(e); + } + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionApiKeywords.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionApiKeywords.java new file mode 100644 index 0000000..d22f3f0 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionApiKeywords.java @@ -0,0 +1,36 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +public class QwenTranscriptionApiKeywords { + + public static final String CHANNEL_ID = "channel_id"; + + public static final String TASK_ID = "task_id"; + + public static final String TASK_STATUS = "task_status"; + + public static final String TASK_RESULT = "result"; + + public static final String FILE_URL = "file_url"; + + public static final String TRANSCRIPTION_URL = "transcription_url"; + + public static final String SUBTASK_STATUS = "subtask_status"; + + public static final String TASK_METRICS = "task_metrics"; + + public static final String DIARIZATION_ENABLED = "diarization_enabled"; + + public static final String SPEAKER_COUNT = "speaker_count"; + + public static final String DISFLUENCY_REMOVAL_ENABLED = "disfluency_removal_enabled"; + + public static final String TIMESTAMP_ALIGNMENT_ENABLED = "timestamp_alignment_enabled"; + + public static final String SPECIAL_WORD_FILTER = "special_word_filter"; + + public static final String AUDIO_EVENT_DETECTION_ENABLED = "audio_event_detection_enabled"; + + public static final String VOCABULARY_ID = "vocabulary_id"; +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionMetrics.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionMetrics.java new file mode 100644 index 0000000..e00497c --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionMetrics.java @@ -0,0 +1,24 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; +import lombok.Data; + +@Data +public class QwenTranscriptionMetrics { + @SerializedName("TOTAL") + private int total; + + @SerializedName("SUCCEEDED") + private int succeeded; + + @SerializedName("FAILED") + private int failed; + + public static QwenTranscriptionMetrics from(JsonObject asJsonObject) { + return JsonUtils.fromJsonObject(asJsonObject, QwenTranscriptionMetrics.class); + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java new file mode 100644 index 0000000..16cf3fb --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java @@ -0,0 +1,62 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +import com.alibaba.dashscope.base.HalfDuplexServiceParam; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.utils.ApiKeywords; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NonNull; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; + +@EqualsAndHashCode(callSuper = true) +@Data +@SuperBuilder +@Slf4j +public class QwenTranscriptionParam extends HalfDuplexServiceParam { + + @NonNull private String fileUrl; + + @Override + public JsonObject getHttpBody() { + JsonObject body = new JsonObject(); + body.addProperty("model", getModel()); + + JsonArray jsonChannelId = new JsonArray(); + + JsonObject jsonInput = new JsonObject(); + jsonInput.addProperty(QwenTranscriptionApiKeywords.FILE_URL, fileUrl); + body.add("input", jsonInput); + + JsonObject jsonParameters = JsonUtils.parametersToJsonObject(getParameters()); + body.add("parameters", jsonParameters); + log.debug("body=>{}", body); + return body; + } + + @Override + public Object getInput() { + JsonObject jsonInput = new JsonObject(); + jsonInput.addProperty(QwenTranscriptionApiKeywords.FILE_URL, fileUrl); + return jsonInput; + } + + @Override + public ByteBuffer getBinaryData() { + throw new UnsupportedOperationException("Unimplemented method 'getBinaryData'"); + } + + @Override + public void validate() throws InputRequiredException {} +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java new file mode 100644 index 0000000..59ea6fe --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java @@ -0,0 +1,29 @@ +package com.alibaba.dashscope.audio.qwen_asr; + +import lombok.Data; +import lombok.experimental.SuperBuilder; + +import java.util.Map; + +@Data +@SuperBuilder +public class QwenTranscriptionQueryParam { + private String taskId; + + private String apiKey; + + private Map headers; + + public Map getCustomHeaders() { + return headers; + } + + public static QwenTranscriptionQueryParam FromTranscriptionParam( + QwenTranscriptionParam param, String taskId) { + return QwenTranscriptionQueryParam.builder() + .apiKey(param.getApiKey()) + .taskId(taskId) + .headers(param.getHeaders()) + .build(); + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java new file mode 100644 index 0000000..f0162e5 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java @@ -0,0 +1,79 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +import com.alibaba.dashscope.common.DashScopeResult; +import com.alibaba.dashscope.common.TaskStatus; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.utils.ApiKeywords; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; +import lombok.Data; +import lombok.EqualsAndHashCode; + +import java.util.ArrayList; +import java.util.List; + +@Data +@EqualsAndHashCode() +public class QwenTranscriptionResult { + @SerializedName(ApiKeywords.REQUEST_ID) + private String requestId; + /** The model outputs. */ + private JsonObject output; + + /** The data usage. */ + private JsonObject usage; + + private TaskStatus taskStatus; + + private String taskId; + + private QwenTranscriptionTaskResult result; + + private QwenTranscriptionMetrics metrics; + + public static QwenTranscriptionResult fromDashScopeResult(DashScopeResult dashScopeResult) + throws ApiException { + QwenTranscriptionResult result = new QwenTranscriptionResult(); + result.output = (JsonObject) dashScopeResult.getOutput(); + if (dashScopeResult.getUsage() != null) { + result.usage = dashScopeResult.getUsage().getAsJsonObject(); + } + result.requestId = dashScopeResult.getRequestId(); + if (dashScopeResult.getOutput() != null) { + if (result.output.has(QwenTranscriptionApiKeywords.TASK_STATUS)) { + JsonElement jsonTaskStatus = result.output.get(QwenTranscriptionApiKeywords.TASK_STATUS); + if (jsonTaskStatus != null) { + result.taskStatus = TaskStatus.valueOf(jsonTaskStatus.getAsString()); + } else { + result.taskStatus = TaskStatus.FAILED; + } + } + if (result.output.has(QwenTranscriptionApiKeywords.TASK_ID)) { + result.taskId = result.output.get(QwenTranscriptionApiKeywords.TASK_ID).getAsString(); + } else { + result.taskId = null; + } + if (result.output.has(QwenTranscriptionApiKeywords.TASK_RESULT)) { + JsonElement jsonResult = result.output.get(QwenTranscriptionApiKeywords.TASK_RESULT); + if (jsonResult != null) { + result.result = QwenTranscriptionTaskResult.from(jsonResult.getAsJsonObject()); + } else { + result.result = new QwenTranscriptionTaskResult(); + } + } + if (result.output.has(QwenTranscriptionApiKeywords.TASK_METRICS)) { + JsonElement jsonMetrics = result.output.get(QwenTranscriptionApiKeywords.TASK_METRICS); + if (jsonMetrics != null) { + result.setMetrics(QwenTranscriptionMetrics.from(jsonMetrics.getAsJsonObject())); + } else { + result.setMetrics(new QwenTranscriptionMetrics()); + } + } + } + return result; + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionTaskResult.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionTaskResult.java new file mode 100644 index 0000000..eb1ce30 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionTaskResult.java @@ -0,0 +1,27 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.qwen_asr; + +import com.alibaba.dashscope.common.TaskStatus; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; +import lombok.Data; + +@Data +public class QwenTranscriptionTaskResult { + @SerializedName("file_url") + String fileUrl; + + @SerializedName("transcription_url") + String transcriptionUrl; + + @SerializedName("subtask_status") + TaskStatus subTaskStatus; + + String message; + + public static QwenTranscriptionTaskResult from(JsonObject json) { + return JsonUtils.fromJsonObject(json, QwenTranscriptionTaskResult.class); + } +} From 6ccb42cb3843f6a33852588f3587a5c61186a862 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Sun, 23 Nov 2025 16:25:17 +0800 Subject: [PATCH 29/64] release version 2.22.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 723cef2..01a4539 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.1 + 2.22.2 8 From 7d340c88fbfba2c5041cffefec7474732c2ff8f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Fri, 7 Nov 2025 14:04:44 +0800 Subject: [PATCH 30/64] fix(app/multimodal-dialog): task-started callback --- .../dashscope/multimodal/MultiModalDialog.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java index 55ce234..f7da527 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java @@ -136,6 +136,7 @@ public MultiModalDialog( .taskGroup(TaskGroup.AIGC.getValue()) .task(Task.MULTIMODAL_GENERATION.getValue()) .function(Function.GENERATION.getValue()) + .passTaskStarted(true) .build(); this.requestParam = param; @@ -163,6 +164,7 @@ public MultiModalDialog( .taskGroup(TaskGroup.AIGC.getValue()) .task(Task.MULTIMODAL_GENERATION.getValue()) .function(Function.GENERATION.getValue()) + .passTaskStarted(true) .build(); this.connectionOptions = connectionOptions; this.connectionOptions.setUseDefaultClient(false); @@ -309,6 +311,13 @@ public void onEvent(DashScopeResult message) { default: break; } + }else if (message.getEvent() != null) { + if (message.getEvent().equals("task-started")){ + callback.onConnected(); + log.debug( + "MultiModalDialog connected, state is {}", + currentState.getValue()); // Logs connection status + } } } @@ -347,10 +356,6 @@ public void onError(Exception e) { // Error event handling stopLatch.get().countDown(); // Counts down latch } } - log.debug( - "MultiModalDialog connected, state is {}", - currentState.getValue()); // Logs connection status - callback.onConnected(); // Connected successfully callback } /** Starts upload speech. */ From 465e2e6810503d7b7d295c0cd9d3ec9d2678f343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Wed, 26 Nov 2025 14:57:44 +0800 Subject: [PATCH 31/64] feat(model/qwen3-livetranslate): add param corpus.phrases --- .../dashscope/audio/omni/OmniRealtimeConfig.java | 3 +++ .../dashscope/audio/omni/OmniRealtimeConstants.java | 1 + .../audio/omni/OmniRealtimeTranslationParam.java | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java index 0334048..c317732 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java @@ -92,6 +92,9 @@ public JsonObject getConfig() { if (translationConfig != null) { Map translationConfig = new HashMap<>(); translationConfig.put(OmniRealtimeConstants.LANGUAGE, this.translationConfig.getLanguage()); + if (this.translationConfig.getCorpus() != null) { + translationConfig.put(OmniRealtimeConstants.TRANSLATION_CORPUS, this.translationConfig.getCorpus()); + } config.put(OmniRealtimeConstants.TRANSLATION, translationConfig); } else { config.put(OmniRealtimeConstants.TRANSLATION, null); diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java index 6b57da4..df7334a 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java @@ -19,6 +19,7 @@ public class OmniRealtimeConstants { // Translation constants public static final String TRANSLATION = "translation"; + public static final String TRANSLATION_CORPUS = "corpus"; public static final String LANGUAGE = "language"; public static final String SAMPLE_RATE = "sample_rate"; diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java index eef4a3d..a51ec9f 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java @@ -4,10 +4,21 @@ import lombok.Builder; import lombok.Data; +import java.util.Map; + /** @author songsong.shao */ @Builder @Data public class OmniRealtimeTranslationParam { /** language for translation */ private String language; + private Corpus corpus; + + @Builder + @Data + public static class Corpus { + /** Custom phrases to improve translation accuracy */ + private Map phrases; // translation phrases, + } + } \ No newline at end of file From 0ebb75bea5d9696a00e283bb6423c5dabe8d6d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Mon, 1 Dec 2025 22:55:15 +0800 Subject: [PATCH 32/64] feat(model/qwen3-tts):add voice names --- .../AudioParameters.java | 93 ++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java index 1be2035..226912e 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java @@ -93,7 +93,98 @@ public enum Voice { KATERINA("Katerina"), @SerializedName("Elias") - ELIAS("Elias"); + ELIAS("Elias"), + + @SerializedName("Momo") + MOMO("Momo"), + + @SerializedName("Moon") + MOON("Moon"), + + @SerializedName("Maia") + MAIA("Maia"), + + @SerializedName("Kai") + KAI("Kai"), + + @SerializedName("Bella") + BELLA("Bella"), + + @SerializedName("Aiden") + AIDEN("Aiden"), + + @SerializedName("Eldric Saga") + ELDRIC_SAGA("Eldric Saga"), + + @SerializedName("Mia") + MIA("Mia"), + + @SerializedName("Mochi") + MOCHI("Mochi"), + + @SerializedName("Bellona") + BELLONA("Bellona"), + + @SerializedName("Vincent") + VINCENT("Vincent"), + + @SerializedName("Bunny") + BUNNY("Bunny"), + + @SerializedName("Neil") + NEIL("Neil"), + + @SerializedName("Arthur") + ARTHUR("Arthur"), + + @SerializedName("Nini") + NINI("Nini"), + + @SerializedName("Ebona") + EBONA("Ebona"), + + @SerializedName("Seren") + SEREN("Seren"), + + @SerializedName("Pip") + PIP("Pip"), + + @SerializedName("Stella") + STELLA("Stella"), + + @SerializedName("Bodega") + BODEGA("Bodega"), + + @SerializedName("Sonrisa") + SONRISA("Sonrisa"), + + @SerializedName("Alek") + ALEK("Alek"), + + @SerializedName("Dolce") + DOLCE("Dolce"), + + @SerializedName("Sohee") + SOHEE("Sohee"), + + @SerializedName("Ono Anna") + ONO_ANNA("Ono Anna"), + + @SerializedName("Lenn") + LENN("Lenn"), + + @SerializedName("Emilien") + EMILIEN("Emilien"), + + @SerializedName("Andre") + ANDRE("Andre"), + + @SerializedName("Radio Gol") + RADIO_GOL("Radio Gol"), + + @SerializedName("Vivian") + VIVIAN("Vivian"); + private final String value; From 5f830bbc5422d9d21c18cd80d43a9585d5f6848e Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 10 Dec 2025 18:51:16 +0800 Subject: [PATCH 33/64] release version 2.22.3 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 01a4539..8275c59 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.2 + 2.22.3 8 From 9f277f73cad3c29afcf134db33e336c33cc2c769 Mon Sep 17 00:00:00 2001 From: x-zm <602187256@qq.com> Date: Tue, 16 Dec 2025 19:49:56 +0800 Subject: [PATCH 34/64] support wan2.6 video generation (#162) Co-authored-by: mose-x.zm --- samples/VideoSynthesisUsage.java | 15 ++++++-- .../aigc/videosynthesis/VideoSynthesis.java | 5 +++ .../videosynthesis/VideoSynthesisParam.java | 36 +++++++++++++++++++ .../alibaba/dashscope/utils/ApiKeywords.java | 6 ++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/samples/VideoSynthesisUsage.java b/samples/VideoSynthesisUsage.java index eade460..fa9c608 100644 --- a/samples/VideoSynthesisUsage.java +++ b/samples/VideoSynthesisUsage.java @@ -9,17 +9,25 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.task.AsyncTaskListParam; +import java.util.ArrayList; +import java.util.List; + public class VideoSynthesisUsage { /** * Create a video compositing task and wait for the task to complete. */ public static void basicCall() throws ApiException, NoApiKeyException, InputRequiredException { VideoSynthesis vs = new VideoSynthesis(); + List referenceVideoUrls = new ArrayList<>(); + referenceVideoUrls.add("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/with_human_voice_11s.mov"); VideoSynthesisParam param = VideoSynthesisParam.builder() - .model("wan2.5-t2v-preview") - .prompt("一只戴着绿色眼镜的小狗在唱rap") - .audioUrl("https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3") + .model("wan2.6-r2v") + .prompt("一只小猫在月光下奔跑") + .referenceVideoUrls(referenceVideoUrls) + .shotType(VideoSynthesis.ShotType.MULTI) + .watermark(Boolean.TRUE) + .audio(Boolean.TRUE) .build(); VideoSynthesisResult result = vs.call(param); System.out.println(result); @@ -53,6 +61,7 @@ public static void main(String[] args) { // fetchTask("b451725d-c48f-4f08-9d26-xxx-xxx"); } catch (ApiException | NoApiKeyException | InputRequiredException e) { System.out.println(e.getMessage()); + e.printStackTrace(); } System.exit(0); } diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java index f043966..1dbc01e 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java @@ -54,6 +54,11 @@ public static class Resolution { public static final String DEFAULT = "720P"; } + public static class ShotType { + public static final String MULTI = "multi"; + public static final String SINGLE = "single"; + } + /** * Create ApiServiceOption * diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index d05a9b5..225d957 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -12,7 +12,9 @@ import com.alibaba.dashscope.utils.PreprocessInputImage; import com.google.gson.JsonObject; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.Builder; import lombok.Data; @@ -55,6 +57,13 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { /** The input audio url. */ @Builder.Default private String audioUrl = null; + /** list of character reference video file urls uploaded by the user */ + @Builder.Default private List referenceVideoUrls = null; + + /** For the description information of the picture and sound of the reference video, corresponding to ref video, + * it needs to be in the order of the url. If the quantity is different, an error will be reported */ + @Builder.Default private List referenceVideoDescription = null; + /** The extra parameters. */ @GsonExclude @Singular protected Map extraInputs; @@ -82,6 +91,8 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { @Builder.Default private Boolean audio = null; + @Builder.Default private String shotType = null; + /** The inputs of the model. */ @Override public JsonObject getInput() { @@ -124,6 +135,14 @@ public JsonObject getInput() { jsonObject.addProperty(TAIL_FRAME, tailFrame); } + if (referenceVideoUrls != null && !referenceVideoUrls.isEmpty()) { + jsonObject.add(REFERENCE_VIDEO_URLS, JsonUtils.toJsonArray(referenceVideoUrls)); + } + + if (referenceVideoDescription != null && !referenceVideoDescription.isEmpty()) { + jsonObject.add(REFERENCE_VIDEO_DESCRIPTION, JsonUtils.toJsonArray(referenceVideoDescription)); + } + if (extraInputs != null && !extraInputs.isEmpty()) { JsonObject extraInputsJsonObject = JsonUtils.parametersToJsonObject(extraInputs); JsonUtils.merge(jsonObject, extraInputsJsonObject); @@ -164,6 +183,9 @@ public Map getParameters() { if (audio != null) { params.put(AUDIO, audio); } + if (shotType != null) { + params.put(SHOT_TYPE, shotType); + } params.putAll(super.getParameters()); return params; } @@ -200,6 +222,13 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { inputChecks.put(LAST_FRAME_URL, this.lastFrameUrl); inputChecks.put(HEAD_FRAME, this.headFrame); inputChecks.put(TAIL_FRAME, this.tailFrame); + int rvs = 0; + if (this.referenceVideoUrls != null) { + rvs = this.referenceVideoUrls.size(); + for (int i = 0; i < rvs; i++) { + inputChecks.put(REFERENCE_VIDEO_URLS + "[" + i + "]", this.referenceVideoUrls.get(i)); + } + } boolean isUpload = PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); @@ -212,6 +241,13 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { this.lastFrameUrl = inputChecks.get(LAST_FRAME_URL); this.headFrame = inputChecks.get(HEAD_FRAME); this.tailFrame = inputChecks.get(TAIL_FRAME); + if (rvs > 0) { + List newVideos = new ArrayList<>(); + for (int i = 0; i < rvs; i++) { + newVideos.add(inputChecks.get(REFERENCE_VIDEO_URLS + "[" + i + "]")); + } + this.referenceVideoUrls = newVideos; + } } } diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 61ca7f1..e56e77f 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -191,4 +191,10 @@ public class ApiKeywords { public static final String LANGUAGE_TYPE = "language_type"; public static final String IMAGES = "images"; + + public static final String REFERENCE_VIDEO_URLS = "reference_video_urls"; + + public static final String REFERENCE_VIDEO_DESCRIPTION = "reference_video_description"; + + public static final String SHOT_TYPE = "shot_type"; } From f3274ff114b352dd403ee1ed7b48691846b9713c Mon Sep 17 00:00:00 2001 From: x-zm <602187256@qq.com> Date: Wed, 17 Dec 2025 11:17:40 +0800 Subject: [PATCH 35/64] Add new parameters for generating video results in wan2.6 (#163) Co-authored-by: mose-x.zm --- samples/VideoSynthesisUsage.java | 11 +++++++++-- .../aigc/videosynthesis/VideoSynthesisOutput.java | 6 ++++++ .../aigc/videosynthesis/VideoSynthesisUsage.java | 13 +++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/samples/VideoSynthesisUsage.java b/samples/VideoSynthesisUsage.java index fa9c608..d6cb21e 100644 --- a/samples/VideoSynthesisUsage.java +++ b/samples/VideoSynthesisUsage.java @@ -19,15 +19,22 @@ public class VideoSynthesisUsage { public static void basicCall() throws ApiException, NoApiKeyException, InputRequiredException { VideoSynthesis vs = new VideoSynthesis(); List referenceVideoUrls = new ArrayList<>(); - referenceVideoUrls.add("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/with_human_voice_11s.mov"); + referenceVideoUrls.add("https://cdn.wanx.aliyuncs.com/wanx/1014827220770308/upload-video-cut/cda0f4dc063ec258184263691558af36.mp4"); + + List referenceVideoDescription = new ArrayList<>(); + referenceVideoDescription.add("这段视频展示一位年轻女性()身着灰色长袖上衣与裤子,乌黑长发垂落,面容清秀。她先低头后抬头,目光侧移,继而转身背对再面向镜头,动作流畅自然。背景为素净灰色墙面,环境简约无装饰。镜头由面部特写缓缓拉远至全身,光影柔和,突出人物形态与情绪。"); VideoSynthesisParam param = VideoSynthesisParam.builder() .model("wan2.6-r2v") - .prompt("一只小猫在月光下奔跑") + .prompt(" character1 站在海边,吹着海风,夕阳西下,阳光洒在她的脸上") .referenceVideoUrls(referenceVideoUrls) + .referenceVideoDescription(referenceVideoDescription) .shotType(VideoSynthesis.ShotType.MULTI) .watermark(Boolean.TRUE) .audio(Boolean.TRUE) + .duration(10) + .promptExtend(Boolean.TRUE) + .size("1280*720") .build(); VideoSynthesisResult result = vs.call(param); System.out.println(result); diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java index 85a7cd6..8b470b9 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java @@ -17,4 +17,10 @@ public class VideoSynthesisOutput { @SerializedName("video_url") private String videoUrl; + + @SerializedName("check_audio") + private String checkAudio; + + @SerializedName("orig_prompt") + private String origPrompt; } diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisUsage.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisUsage.java index e69ebc1..afda20b 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisUsage.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisUsage.java @@ -13,4 +13,17 @@ public class VideoSynthesisUsage { @SerializedName("video_ratio") private String videoRatio; + + private float duration; + + private String size; + + @SerializedName("input_video_duration") + private float inputVideoDuration; + + @SerializedName("output_video_duration") + private float outputVideoDuration; + + @SerializedName("SR") + private String SR; } From 34a84c2110ce42d855a0a2e0467e9c2af5eaa17e Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 17 Dec 2025 20:07:12 +0800 Subject: [PATCH 36/64] feat(http): support proxy through connection options (#164) --- samples/MultiModalConversationQwenVL.java | 8 ++++---- .../protocol/okhttp/OkHttpClientFactory.java | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/samples/MultiModalConversationQwenVL.java b/samples/MultiModalConversationQwenVL.java index 83ec1e0..97724b6 100644 --- a/samples/MultiModalConversationQwenVL.java +++ b/samples/MultiModalConversationQwenVL.java @@ -64,9 +64,9 @@ public static void imageSample() throws ApiException, NoApiKeyException, UploadF List messages = Arrays.asList(systemMessage, userMessage); MultiModalConversationParam param = MultiModalConversationParam.builder() .messages(messages) - .model("qvq-max").build(); //qwen3-vl-plus + .model("qwen3-vl-plus").build(); //qwen3-vl-plus Flowable flowable = conversation.streamCall(param); - flowable.forEach(result -> { + flowable.blockingForEach(result -> { System.out.println(JsonUtils.toJson(result)); }); } @@ -155,8 +155,8 @@ public static void streamCallWithToolCalls() public static void main(String[] args) { try { -// imageSample(); - videoSample(); + imageSample(); +// videoSample(); // streamCallWithToolCalls(); } catch (ApiException | NoApiKeyException | UploadFileException e) { System.out.println(e.getMessage()); diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java index 3262070..f90076c 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java @@ -75,11 +75,22 @@ public static OkHttpClient getOkHttpClient() { } public static OkHttpClient getNewOkHttpClient(ConnectionOptions connectionOptions) { - return Holder.INSTANCE + Builder builder = Holder.INSTANCE .newBuilder() .connectTimeout(connectionOptions.getConnectTimeout()) .readTimeout(connectionOptions.getReadTimeout()) - .writeTimeout(connectionOptions.getWriteTimeout()) - .build(); + .writeTimeout(connectionOptions.getWriteTimeout()); + + // Configure proxy if available + if (connectionOptions.getProxy() != null) { + builder.proxy(connectionOptions.getProxy()); + } + + // Configure proxy authenticator if available + if (connectionOptions.getProxyAuthenticator() != null) { + builder.proxyAuthenticator(connectionOptions.getProxyAuthenticator()); + } + + return builder.build(); } } From 8af018f22e0b09546f53f8fa8d9528b738f18bd3 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 18 Dec 2025 10:12:12 +0800 Subject: [PATCH 37/64] release version 2.22.4 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8275c59..d9b455e 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.3 + 2.22.4 8 From 8e12f9ae40bd50b5259a66512252bc1765234dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 16 Dec 2025 15:55:02 +0800 Subject: [PATCH 38/64] (app/multimodal-dialog): enable pre_task_id to set taskId --- .../com/alibaba/dashscope/multimodal/MultiModalDialog.java | 3 ++- .../dashscope/multimodal/MultiModalDialogApiKeyWords.java | 1 + .../alibaba/dashscope/multimodal/MultiModalRequestParam.java | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java index f7da527..f383907 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java @@ -221,11 +221,12 @@ public void start() { stopLatch = new AtomicReference<>(new CountDownLatch(1)); // Initializes stop signal latch + String preTaskId = requestParam.getTaskId() != null ? requestParam.getTaskId() : UUID.randomUUID().toString(); requestParamWithStream = MultiModalRequestParamWithStream.FromMultiModalParam( this.requestParam, dataFrames, - UUID.randomUUID().toString()); // Creates request parameter with stream + preTaskId); // Creates request parameter with stream try { this.duplexApi.duplexCall( diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java index a559f6c..462e5cd 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java @@ -13,6 +13,7 @@ public class MultiModalDialogApiKeyWords { public static String CONST_NAME_CLIENT_INFO = "client_info"; public static String CONST_NAME_BIZ_PARAMS = "biz_params"; public static String CONST_NAME_IMAGES = "images"; + public static String CONST_NAME_TASK_ID = "task_id"; public static String CONST_NAME_UP_STREAM_AUDIO_FORMAT = "audio_format"; public static String CONST_NAME_UP_STREAM_TYPE = "type"; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java index 09128fd..9f6e908 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java @@ -29,6 +29,7 @@ public class MultiModalRequestParam extends FullDuplexServiceParam { private BizParams bizParams; private CustomInput customInput; private List images; + private String taskId; @Builder public static class CustomInput { @@ -263,6 +264,9 @@ public Map getParameters() { if (images != null) { params.put(CONST_NAME_IMAGES, images); } + if (this.parameters != null){ + params.putAll(this.parameters); + } return params; } From 6eaae41c2399da48ab9e237399f47998b64a3267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 23 Dec 2025 14:23:59 +0800 Subject: [PATCH 39/64] feat(modal/qwen3-asr): add endSession api --- .../audio/omni/OmniRealtimeConstants.java | 2 + .../audio/omni/OmniRealtimeConversation.java | 78 ++++++++++++++----- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java index df7334a..bd84219 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java @@ -35,10 +35,12 @@ public class OmniRealtimeConstants { public static final String PROTOCOL_EVENT_TYPE_CLEAR_AUDIO = "input_audio_buffer.clear"; public static final String PROTOCOL_EVENT_TYPE_CREATE_RESPONSE = "response.create"; public static final String PROTOCOL_EVENT_TYPE_CANCEL_RESPONSE = "response.cancel"; + public static final String PROTOCOL_EVENT_TYPE_FINISH_SESSION = "session.finish"; public static final String PROTOCOL_RESPONSE_TYPE_SESSION_CREATED = "session.created"; public static final String PROTOCOL_RESPONSE_TYPE_RESPONSE_CREATED = "response.created"; public static final String PROTOCOL_RESPONSE_TYPE_AUDIO_TRANSCRIPT_DELTA = "response.audio_transcript.delta"; + public static final String PROTOCOL_RESPONSE_TYPE_SESSION_FINISHED = "session.finished"; public static final String PROTOCOL_RESPONSE_TYPE_AUDIO_DELTA = "response.audio.delta"; public static final String PROTOCOL_RESPONSE_TYPE_RESPONSE_DONE = "response.done"; } \ No newline at end of file diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java index b41f419..6510caa 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java @@ -25,6 +25,7 @@ /** @author lengjiayi */ @Slf4j public class OmniRealtimeConversation extends WebSocketListener { + private static final int DEFAULT_TIMEOUT = 20; private OmniRealtimeParam parameters; private OmniRealtimeCallback callback; @@ -38,6 +39,7 @@ public class OmniRealtimeConversation extends WebSocketListener { private long lastFirstAudioDelay = -1; private long lastFirstTextDelay = -1; private AtomicBoolean isClosed = new AtomicBoolean(false); + private final AtomicReference disconnectLatch = new AtomicReference<>(null); /** * Constructor @@ -73,6 +75,44 @@ public void connect() throws NoApiKeyException, InterruptedException { connectLatch.get().await(); } + // block wait server session done, max 20 seconds, then close connection + public void endSession() throws InterruptedException{ + endSession(DEFAULT_TIMEOUT); + } + + // block wait server session done ,then close connection + public void endSession(int timeout) throws InterruptedException{ + checkStatus(); + CountDownLatch latch = new CountDownLatch(1); + disconnectLatch.set(latch); + endSessionAsync(); + boolean finishSuccess = false; + try { + finishSuccess = latch.await(timeout, java.util.concurrent.TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + close(1011, "interrupted while waiting for session finish"); + throw e; + } + if (!finishSuccess) { + close(1011, "disconnect timeout after " + timeout + " seconds"); + } else { + close(1000, "bye"); + } + } + + // user need close connection manually after callback 'session.finished' + public void endSessionAsync() { + checkStatus(); + Map commit_request = new HashMap<>(); + commit_request.put(OmniRealtimeConstants.PROTOCOL_EVENT_ID, generateEventId()); + commit_request.put( + OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_FINISH_SESSION); + sendMessage(createGson().toJson(commit_request), true); + } + + + /** * Update session configuration, should be used before create response * @@ -87,10 +127,7 @@ public void updateSession(OmniRealtimeConfig config) { OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_UPDATE_SESSION); update_request.put(OmniRealtimeConstants.PROTOCOL_SESSION, configJson); - GsonBuilder builder = new GsonBuilder(); - builder.serializeNulls(); - Gson gson = builder.create(); - sendMessage(gson.toJson(update_request), true); + sendMessage(createGson().toJson(update_request), true); } /** @@ -108,10 +145,7 @@ public void appendAudio(String audioBase64) { OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_APPEND_AUDIO); append_request.put(OmniRealtimeConstants.PROTOCOL_AUDIO, audioBase64); log.debug("append audio with eid: {}, length: {}", event_id, audioBase64.length()); - GsonBuilder builder = new GsonBuilder(); - builder.serializeNulls(); - Gson gson = builder.create(); - sendMessage(gson.toJson(append_request), false); + sendMessage(createGson().toJson(append_request), false); } /** @@ -129,10 +163,7 @@ public void appendVideo(String videoBase64) { OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_APPEND_VIDEO); append_request.put(OmniRealtimeConstants.PROTOCOL_VIDEO, videoBase64); log.debug("append video with eid: " + event_id + ", length: " + videoBase64.length()); - GsonBuilder builder = new GsonBuilder(); - builder.serializeNulls(); - Gson gson = builder.create(); - sendMessage(gson.toJson(append_request), false); + sendMessage(createGson().toJson(append_request), false); } /** @@ -145,10 +176,7 @@ public void commit() { commit_request.put(OmniRealtimeConstants.PROTOCOL_EVENT_ID, generateEventId()); commit_request.put( OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_COMMIT); - GsonBuilder builder = new GsonBuilder(); - builder.serializeNulls(); - Gson gson = builder.create(); - sendMessage(gson.toJson(commit_request), true); + sendMessage(createGson().toJson(commit_request), true); } /** clear the audio sent to server before. */ @@ -158,10 +186,7 @@ public void clearAppendedAudio() { clear_request.put(OmniRealtimeConstants.PROTOCOL_EVENT_ID, generateEventId()); clear_request.put( OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_CLEAR_AUDIO); - GsonBuilder builder = new GsonBuilder(); - builder.serializeNulls(); - Gson gson = builder.create(); - sendMessage(gson.toJson(clear_request), true); + sendMessage(createGson().toJson(clear_request), true); } /** @@ -290,6 +315,10 @@ private String generateEventId() { return "event_" + java.util.UUID.randomUUID().toString().replace("-", ""); } + private Gson createGson() { + return new GsonBuilder().serializeNulls().create(); + } + private void sendMessage(String message, boolean enableLog) { if (enableLog == true) { log.debug("send message: " + message); @@ -351,6 +380,13 @@ public void onMessage(WebSocket webSocket, String text) { + lastFirstAudioDelay + " ms"); break; + case OmniRealtimeConstants.PROTOCOL_RESPONSE_TYPE_SESSION_FINISHED: + log.info("session: " + sessionId + " finished"); + CountDownLatch latch = disconnectLatch.get(); + if (latch != null) { + latch.countDown(); + } + break; } } } @@ -374,4 +410,4 @@ public void onClosing(@NotNull WebSocket webSocket, int code, @NotNull String re websocktetClient.close(code, reason); log.debug("WebSocket closing: " + code + ", " + reason); } -} +} \ No newline at end of file From 6dd3229b204082230cb71f92e2b5ac46d1b09dbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 23 Dec 2025 11:48:15 +0800 Subject: [PATCH 40/64] [Fix] recognition first package delay use first valid text --- .../dashscope/audio/asr/recognition/Recognition.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java index 31d2968..fa582b5 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java @@ -139,7 +139,8 @@ public Flowable streamCall( if (lastRequestId.get() == null && result.getRequestId() != null) { lastRequestId.set(result.getRequestId()); } - if (firstPackageTimeStamp < 0) { + if (firstPackageTimeStamp < 0 && result.getSentence()!=null + && result.getSentence().getText() != null && !result.getSentence().getText().isEmpty()) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); } @@ -209,7 +210,9 @@ public void onEvent(DashScopeResult message) { lastRequestId.set(recognitionResult.getRequestId()); } if (!recognitionResult.isCompleteResult()) { - if (firstPackageTimeStamp < 0) { + if (firstPackageTimeStamp < 0 && recognitionResult.getSentence()!=null + && recognitionResult.getSentence().getText() != null + && !recognitionResult.getSentence().getText().isEmpty()) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); } @@ -320,7 +323,9 @@ public String call(RecognitionParam param, File file) { if (lastRequestId.get() == null && recognitionResult.getRequestId() != null) { lastRequestId.set(recognitionResult.getRequestId()); } - if (!recognitionResult.isCompleteResult() && recognitionResult.isSentenceEnd()) { + if (!recognitionResult.isCompleteResult() && recognitionResult.getSentence()!=null + && recognitionResult.getSentence().getText() != null + && !recognitionResult.getSentence().getText().isEmpty()) { if (firstPackageTimeStamp < 0) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); From 3401fece1ceef505356752a4bba91e5cec335c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 23 Dec 2025 11:38:55 +0800 Subject: [PATCH 41/64] (model/qwen-tts): add enbale_tn param --- .../audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java | 8 ++++++++ .../audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java | 1 + .../dashscope/audio/tts/SpeechSynthesisResult.java | 2 ++ 3 files changed, 11 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index d6463f0..5bd0600 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -44,6 +44,9 @@ public class QwenTtsRealtimeConfig { /** bitRate for tts , support 6~510,default is 128kbps. only work on format: opus/mp3 */ @Builder.Default Integer bitRate = null; + /** text normalization, default is false */ + @Builder.Default Boolean enableTn = false; + /** The extra parameters. */ @Builder.Default Map parameters = null; @@ -76,6 +79,11 @@ public JsonObject getConfig() { if (languageType != null) { config.put(QwenTtsRealtimeConstants.LANGUAGE_TYPE,languageType); } + + if (enableTn != null) { + config.put(QwenTtsRealtimeConstants.ENABLE_TN, enableTn); + } + if (parameters != null) { for (Map.Entry entry : parameters.entrySet()) { config.put(entry.getKey(), entry.getValue()); diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java index 589a219..f6eb119 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java @@ -14,6 +14,7 @@ public class QwenTtsRealtimeConstants { public static final String VOLUME = "volume"; public static final String BIT_RATE = "bit_rate"; public static final String LANGUAGE_TYPE = "language_type"; + public static final String ENABLE_TN = "enable_tn"; public static final String PROTOCOL_EVENT_ID = "event_id"; public static final String PROTOCOL_TYPE = "type"; public static final String PROTOCOL_SESSION = "session"; diff --git a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java index de7c472..36f7c17 100644 --- a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java +++ b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java @@ -42,6 +42,8 @@ public static SpeechSynthesisResult fromDashScopeResult(DashScopeResult dashScop SpeechSynthesisResult result = new SpeechSynthesisResult(); if (dashScopeResult.getOutput() instanceof ByteBuffer) { result.audioFrame = cloneBuffer((ByteBuffer) dashScopeResult.getOutput()); + }else if (dashScopeResult.getOutput() instanceof JsonObject) { + result.output =(JsonObject) dashScopeResult.getOutput(); } try { if (dashScopeResult.getRequestId() != null) { From b2c848c287c6f4ecf16fa7bfe70aed39de102a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Thu, 25 Dec 2025 18:14:50 +0800 Subject: [PATCH 42/64] feat(model/qwen-tts):set enable_tn default to true --- .../audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index 5bd0600..5da409f 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -44,8 +44,8 @@ public class QwenTtsRealtimeConfig { /** bitRate for tts , support 6~510,default is 128kbps. only work on format: opus/mp3 */ @Builder.Default Integer bitRate = null; - /** text normalization, default is false */ - @Builder.Default Boolean enableTn = false; + /** text normalization, default is true */ + @Builder.Default Boolean enableTn = true; /** The extra parameters. */ @Builder.Default Map parameters = null; From fb17d35e12e046a424af9e94cf2e39f01667a6a7 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Wed, 7 Jan 2026 15:30:42 +0800 Subject: [PATCH 43/64] release version 2.22.5 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d9b455e..bb0f1cc 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.4 + 2.22.5 8 From f6564eccbc9fd1ccd730a8374d06b8a63c3a1749 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Thu, 8 Jan 2026 17:46:58 +0800 Subject: [PATCH 44/64] support wan2.6-image and wan2.6-t2i --- samples/ImageGenerationUsage.java | 138 +++++ .../aigc/imagegeneration/ImageGeneration.java | 543 ++++++++++++++++++ .../ImageGenerationListResult.java | 54 ++ .../ImageGenerationMessage.java | 20 + .../ImageGenerationOutput.java | 51 ++ .../imagegeneration/ImageGenerationParam.java | 168 ++++++ .../ImageGenerationResult.java | 45 ++ .../imagegeneration/ImageGenerationUsage.java | 50 ++ .../com/alibaba/dashscope/common/Task.java | 9 +- .../alibaba/dashscope/common/TaskGroup.java | 8 +- .../utils/PreprocessMessageInput.java | 27 + 11 files changed, 1105 insertions(+), 8 deletions(-) create mode 100644 samples/ImageGenerationUsage.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationResult.java create mode 100644 src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationUsage.java diff --git a/samples/ImageGenerationUsage.java b/samples/ImageGenerationUsage.java new file mode 100644 index 0000000..27fdb3b --- /dev/null +++ b/samples/ImageGenerationUsage.java @@ -0,0 +1,138 @@ +import com.alibaba.dashscope.aigc.imagegeneration.*; +import com.alibaba.dashscope.common.Role; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.exception.UploadFileException; +import com.alibaba.dashscope.task.AsyncTaskListParam; +import io.reactivex.Flowable; + +import java.util.Arrays; +import java.util.Collections; + +public class ImageGenerationUsage { + + private static final String DASHSCOPE_API_KEY = System.getenv("DASHSCOPE_API_KEY"); + + static ImageGenerationMessage t2iMessage = ImageGenerationMessage.builder() + .role(Role.USER.getValue()) + .content(Collections.singletonList( + Collections.singletonMap("text", "一间有着精致窗户的花店,漂亮的木质门,摆放着花朵") + )).build(); + + public static void t2iUsage() throws NoApiKeyException, UploadFileException{ + ImageGenerationParam param = ImageGenerationParam.builder() + .apiKey(DASHSCOPE_API_KEY) + .model(ImageGeneration.Models.WanX2_6_T2I) + .n(3) + .messages(Collections.singletonList(t2iMessage)) + .build(); + + ImageGeneration ig = new ImageGeneration(); + ImageGenerationResult res =ig.call(param); + System.out.println(res); + } + + public static void imageUsage() throws NoApiKeyException, UploadFileException{ + ImageGenerationMessage userMessage = ImageGenerationMessage.builder() + .role(Role.USER.getValue()) + .content(Arrays.asList( + // --------------- + // image 支持本地文件 + // --------------- + Collections.singletonMap("text", "参考图1的风格和图2的背景,生成番茄炒蛋"), + Collections.singletonMap("image", "https://cdn.wanx.aliyuncs.com/tmp/pressure/umbrella1.png"), + Collections.singletonMap("image", "https://img.alicdn.com/imgextra/i3/O1CN01SfG4J41UYn9WNt4X1_!!6000000002530-49-tps-1696-960.webp") + )).build(); + ImageGenerationParam param = ImageGenerationParam.builder() + .apiKey(DASHSCOPE_API_KEY) + .model(ImageGeneration.Models.WanX2_6_IMAGE) + .n(1) + .messages(Collections.singletonList(userMessage)) + .build(); + + ImageGeneration ig = new ImageGeneration(); + ImageGenerationResult res =ig.call(param); + System.out.println(res); + } + + + public static void t2iUsageAsync() throws NoApiKeyException, UploadFileException{ + ImageGenerationParam param = ImageGenerationParam.builder() + .apiKey(DASHSCOPE_API_KEY) + .model(ImageGeneration.Models.WanX2_6_T2I) + .n(1) + .messages(Collections.singletonList(t2iMessage)) + .build(); + + ImageGeneration ig = new ImageGeneration(); + ImageGenerationResult res = ig.asyncCall(param); + System.out.println(res); + + String taskId = res.getOutput().getTaskId(); + testAsyncTask(taskId); + } + + public static void testAsyncTask(String taskId) throws NoApiKeyException { + ImageGeneration ig = new ImageGeneration(); + System.out.println(); + System.out.println(); + System.out.println("-----------async-t2i-fetch-res-----------"); + ImageGenerationResult res = ig.fetch(taskId, DASHSCOPE_API_KEY); + System.out.println(res); + + try { + System.out.println(); + System.out.println(); + System.out.println("-----------async-t2i-cancel-res-----------"); + res = ig.cancel(taskId, DASHSCOPE_API_KEY); + System.out.println(res); + }catch (Exception e){ + System.out.println(e.getMessage()); + } + + System.out.println(); + System.out.println(); + System.out.println("-----------async-t2i-wait-res-----------"); + res = ig.wait(taskId, DASHSCOPE_API_KEY); + System.out.println(res); + + AsyncTaskListParam param = AsyncTaskListParam.builder().build(); + System.out.println(); + System.out.println(); + System.out.println("-----------async-task-list-res-----------"); + ImageGenerationListResult res2 = ig.list(param); + System.out.println(res2); + } + + public static void imageUsageStream() throws NoApiKeyException, UploadFileException{ + ImageGenerationMessage userMessage = ImageGenerationMessage.builder() + .role(Role.USER.getValue()) + .content(Collections.singletonList( + Collections.singletonMap("text", "给我一个1张图辣椒炒肉教程") + )).build(); + ImageGenerationParam param = ImageGenerationParam.builder() + .apiKey(DASHSCOPE_API_KEY) + .model(ImageGeneration.Models.WanX2_6_IMAGE) + .messages(Collections.singletonList(userMessage)) + .stream(true) + .enableInterleave(true) + .maxImages(1) + .build(); + + ImageGeneration ig = new ImageGeneration(); + Flowable res = ig.streamCall(param); + res.blockingForEach(System.out::println); + } + + public static void main(String[] args) { + try { + t2iUsage(); +// imageUsage(); +// t2iUsageAsync(); +// imageUsageStream(); + }catch (NoApiKeyException | UploadFileException e){ + System.out.println(e.getMessage()); + } + System.exit(0); + } + +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java new file mode 100644 index 0000000..106cce3 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java @@ -0,0 +1,543 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.api.AsynchronousApi; +import com.alibaba.dashscope.api.SynchronizeHalfDuplexApi; +import com.alibaba.dashscope.common.*; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.exception.UploadFileException; +import com.alibaba.dashscope.protocol.*; +import com.alibaba.dashscope.task.AsyncTaskListParam; +import com.alibaba.dashscope.tools.ToolCallBase; +import com.alibaba.dashscope.tools.ToolCallFunction; +import com.alibaba.dashscope.utils.OSSUploadCertificate; +import com.alibaba.dashscope.utils.ParamUtils; +import com.alibaba.dashscope.utils.PreprocessMessageInput; +import io.reactivex.Flowable; +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Slf4j +public final class ImageGeneration { + /* Auto history messages */ + private final SynchronizeHalfDuplexApi syncApi; + private final AsynchronousApi asyncApi; + private final ApiServiceOption serviceOption; + private final String baseUrl; + + private final ThreadLocal> accumulatedDataMap = + ThreadLocal.withInitial(HashMap::new); + + public static class Models { + public static final String WanX2_6_T2I = "wan2.6-t2i"; + public static final String WanX2_6_IMAGE = "wan2.6-image"; + } + + private ApiServiceOption defaultSyncApiServiceOption() { + return ApiServiceOption.builder() + .protocol(Protocol.HTTP) + .httpMethod(HttpMethod.POST) + .streamingMode(StreamingMode.NONE) + .outputMode(OutputMode.ACCUMULATE) + .taskGroup(TaskGroup.AIGC.getValue()) + .function(Function.GENERATION.getValue()) + .build(); + } + + public ImageGeneration() { + serviceOption = defaultSyncApiServiceOption(); + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + asyncApi = new AsynchronousApi<>(); + this.baseUrl = null; + } + + public ImageGeneration(String protocol) { + serviceOption = defaultSyncApiServiceOption(); + serviceOption.setProtocol(Protocol.of(protocol)); + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + asyncApi = new AsynchronousApi<>(); + this.baseUrl = null; + } + + public ImageGeneration(String protocol, String baseUrl) { + serviceOption = defaultSyncApiServiceOption(); + serviceOption.setProtocol(Protocol.of(protocol)); + if (Protocol.HTTP.getValue().equals(protocol)) { + serviceOption.setBaseHttpUrl(baseUrl); + } else { + serviceOption.setBaseWebSocketUrl(baseUrl); + } + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + asyncApi = new AsynchronousApi<>(); + + this.baseUrl = baseUrl; + } + + public ImageGeneration( + String protocol, String baseUrl, ConnectionOptions connectionOptions) { + serviceOption = defaultSyncApiServiceOption(); + serviceOption.setProtocol(Protocol.of(protocol)); + if (Protocol.HTTP.getValue().equals(protocol)) { + serviceOption.setBaseHttpUrl(baseUrl); + } else { + serviceOption.setBaseWebSocketUrl(baseUrl); + } + syncApi = new SynchronizeHalfDuplexApi<>(connectionOptions, serviceOption); + asyncApi = new AsynchronousApi<>(); + + this.baseUrl = baseUrl; + } + + /** + * Call the server to get the whole result. + * + * @param param The input param of class `ImageGenerationParam`. + * @return The output structure of `ImageGenerationResult`. + * @throws NoApiKeyException Can not find api key + * @throws UploadFileException Fail upload failed. + */ + public ImageGenerationResult call(ImageGenerationParam param) + throws ApiException, NoApiKeyException, UploadFileException { + serviceOption.setIsSSE(false); + serviceOption.setStreamingMode(StreamingMode.NONE); + serviceOption.setTask(Task.MULTIMODAL_GENERATION.getValue()); + preprocessInput(param); + return ImageGenerationResult.fromDashScopeResult(syncApi.call(param)); + } + + /** + * Call the server to get the result in the callback function. + * + * @param param The input param of class `ImageGenerationParam`. + * @param callback The callback to receive response, the template class is + * `ImageGenerationResult`. + * @throws NoApiKeyException Can not find api key + * @throws ApiException The request failed, possibly due to a network or data error. + * @throws UploadFileException File upload failed. + */ + public void call( + ImageGenerationParam param, ResultCallback callback) + throws ApiException, NoApiKeyException, UploadFileException { + serviceOption.setIsSSE(false); + serviceOption.setStreamingMode(StreamingMode.NONE); + serviceOption.setTask(Task.MULTIMODAL_GENERATION.getValue()); + preprocessInput(param); + syncApi.call( + param, + new ResultCallback() { + @Override + public void onEvent(DashScopeResult message) { + callback.onEvent(ImageGenerationResult.fromDashScopeResult(message)); + } + + @Override + public void onComplete() { + callback.onComplete(); + } + + @Override + public void onError(Exception e) { + callback.onError(e); + } + }); + } + + /** + * Call the server to get the whole result. + * + * @param param The input param of class `ImageGenerationParam`. + * @return The output structure of `ImageGenerationResult`. + * @throws NoApiKeyException Can not find api key + * @throws UploadFileException Fail upload failed. + */ + public ImageGenerationResult asyncCall(ImageGenerationParam param) + throws ApiException, NoApiKeyException, UploadFileException { + preprocessInput(param); + serviceOption.setTask(Task.IMAGE_GENERATION.getValue()); + serviceOption.setIsAsyncTask(true); + return ImageGenerationResult.fromDashScopeResult(asyncApi.asyncCall(param, serviceOption)); + } + + public ImageGenerationListResult list(AsyncTaskListParam param) + throws ApiException, NoApiKeyException { + return ImageGenerationListResult.fromDashScopeResult(asyncApi.list(param, baseUrl)); + } + + public ImageGenerationListResult list( + String startTime, + String endTime, + String modelName, + String apiKeyId, + String region, + String status, + Integer pageNo, + Integer pageSize) + throws ApiException, NoApiKeyException { + return ImageGenerationListResult.fromDashScopeResult( + asyncApi.list( + startTime, endTime, modelName, apiKeyId, region, status, pageNo, pageSize, baseUrl)); + } + + public ImageGenerationResult fetch(String taskId, String apiKey) + throws ApiException, NoApiKeyException { + return ImageGenerationResult.fromDashScopeResult(asyncApi.fetch(taskId, apiKey, baseUrl)); + } + + public ImageGenerationResult fetch(ImageGenerationResult taskInfo, String apiKey) + throws ApiException, NoApiKeyException { + + return ImageGenerationResult.fromDashScopeResult( + asyncApi.fetch(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); + } + + public ImageGenerationResult cancel(String taskId, String apiKey) + throws ApiException, NoApiKeyException { + return ImageGenerationResult.fromDashScopeResult(asyncApi.cancel(taskId, apiKey, baseUrl)); + } + + public ImageGenerationResult cancel(ImageGenerationResult taskInfo, String apiKey) + throws ApiException, NoApiKeyException { + DashScopeResult res = asyncApi.cancel(taskInfo.getOutput().getTaskId(), apiKey, baseUrl); + return ImageGenerationResult.fromDashScopeResult(res); + } + + public ImageGenerationResult wait(String taskId, String apiKey) + throws ApiException, NoApiKeyException { + return ImageGenerationResult.fromDashScopeResult(asyncApi.wait(taskId, apiKey, baseUrl)); + } + + public ImageGenerationResult wait(ImageGenerationResult taskInfo, String apiKey) + throws ApiException, NoApiKeyException { + return ImageGenerationResult.fromDashScopeResult( + asyncApi.wait(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); + } + + + /** + * Call the server to get the result by stream. + * + * @param param The input param of class `ImageGenerationParam`. + * @return A `Flowable` of the output structure. + * @throws NoApiKeyException Can not find api key + * @throws ApiException The request failed, possibly due to a network or data error. + * @throws UploadFileException File upload failed. + */ + public Flowable streamCall(ImageGenerationParam param) + throws ApiException, NoApiKeyException, UploadFileException { + // Intercept and modify incrementalOutput parameter if needed + boolean toMergeResponse = modifyIncrementalOutput(param); + // Build custom user agent suffix with incremental_to_full flag + int flagValue = toMergeResponse ? 1 : 0; + String userAgentSuffix = String.format("incremental_to_full/%d", flagValue); + param.putHeader("user-agent", userAgentSuffix); + + serviceOption.setIsSSE(true); + serviceOption.setStreamingMode(StreamingMode.OUT); + serviceOption.setTask(Task.MULTIMODAL_GENERATION.getValue()); + preprocessInput(param); + return syncApi + .streamCall(param) + .map(ImageGenerationResult::fromDashScopeResult) + .map(result -> mergeSingleResponse(result, toMergeResponse)) + .doOnComplete(() -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }) + .doOnError(throwable -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }); + } + + /** + * Call the server to get the result by stream. + * + * @param param The input param of class `ImageGenerationParam`. + * @param callback The result callback. + * @throws NoApiKeyException Can not find api key + * @throws ApiException The request failed, possibly due to a network or data error. + * @throws InputRequiredException The input field is missing. + * @throws UploadFileException File upload failed. + */ + public void streamCall( + ImageGenerationParam param, ResultCallback callback) + throws ApiException, NoApiKeyException, InputRequiredException, UploadFileException { + param.validate(); + + // Intercept and modify incrementalOutput parameter if needed + boolean toMergeResponse = modifyIncrementalOutput(param); + + // Build custom user agent suffix with incremental_to_full flag + int flagValue = toMergeResponse ? 1 : 0; + String userAgentSuffix = String.format("incremental_to_full/%d", flagValue); + param.putHeader("user-agent", userAgentSuffix); + + serviceOption.setIsSSE(true); + serviceOption.setStreamingMode(StreamingMode.OUT); + serviceOption.setTask(Task.MULTIMODAL_GENERATION.getValue()); + preprocessInput(param); + syncApi.streamCall( + param, + new ResultCallback() { + @Override + public void onEvent(DashScopeResult msg) { + ImageGenerationResult result = ImageGenerationResult.fromDashScopeResult(msg); + ImageGenerationResult mergedResult = mergeSingleResponse(result, toMergeResponse); + callback.onEvent(mergedResult); + } + + @Override + public void onComplete() { + if (toMergeResponse) { + clearAccumulatedData(); + } + callback.onComplete(); + } + + @Override + public void onError(Exception e) { + if (toMergeResponse) { + clearAccumulatedData(); + } + callback.onError(e); + } + }); + } + + private void preprocessInput(ImageGenerationParam param) + throws NoApiKeyException, UploadFileException { + boolean hasUpload = false; + OSSUploadCertificate certificate = null; + for (ImageGenerationMessage msg : param.getMessages()) { + boolean isUpload; + PreprocessMessageInput.PreprocessResult result = + PreprocessMessageInput.preProcessMultiModalMessageInputs( + param.getModel(), msg, + param.getApiKey(), certificate); + isUpload = result.hasUpload(); + certificate = result.getCertificate(); + if (isUpload && !hasUpload) { + hasUpload = true; + } + } + if (hasUpload) { + param.putHeader("X-DashScope-OssResourceResolve", "enable"); + } + } + + /** + * Modifies the parameters for internal streaming optimization. + * If incrementalOutput is false, modifies the ImageGenerationParam object to set + * incrementalOutput to true for internal streaming optimization. + * + * @param param The parameter object to modify + * @return true if the parameter was modified, false otherwise + */ + private boolean modifyIncrementalOutput(ImageGenerationParam param) { + Boolean incrementalOutput = param.getIncrementalOutput(); + if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) && + Boolean.FALSE.equals(incrementalOutput)) { + // Modify the ImageGenerationParam object to enable incremental output + param.setIncrementalOutput(true); + return true; + } + return false; + } + + /** + * Merges a single ImageGenerationResult with accumulated data for non-incremental output simulation. + * This method accumulates text content and tool_calls from streaming responses. + * + * @param result The ImageGenerationResult to merge + * @param toMergeResponse Whether to perform merging (based on original incrementalOutput setting) + * @return The merged ImageGenerationResult + */ + private ImageGenerationResult mergeSingleResponse(ImageGenerationResult result, boolean toMergeResponse) { + if (!toMergeResponse || result == null || result.getOutput() == null) { + return result; + } + + Map accumulatedData = accumulatedDataMap.get(); + + // Handle choices format: output.choices[].message.content + if (result.getOutput().getChoices() != null) { + List choices = result.getOutput().getChoices(); + for (int choiceIdx = 0; choiceIdx < choices.size(); choiceIdx++) { + ImageGenerationOutput.Choice choice = choices.get(choiceIdx); + + // Initialize accumulated data for this choice if not exists + AccumulatedData accumulated = accumulatedData.computeIfAbsent( + choiceIdx, k -> new AccumulatedData()); + + if (choice.getMessage() != null) { + // Handle content accumulation (text content in content list) + List> currentContent = choice.getMessage().getContent(); + if (currentContent != null && !currentContent.isEmpty()) { + mergeTextContent(currentContent, accumulated); + } + // Always set the accumulated content if we have any + if (!accumulated.content.isEmpty()) { + choice.getMessage().setContent(accumulated.content); + } + } + } + } + + return result; + } + + /** + * Merges text content from current response with accumulated content. + * For MultiModal, content is a List> where text content is in maps with "text" key. + */ + private void mergeTextContent(List> currentContent, AccumulatedData accumulated) { + for (Map contentItem : currentContent) { + if (contentItem.containsKey("text")) { + String textValue = (String) contentItem.get("text"); + if (textValue != null && !textValue.isEmpty()) { + // Find or create text content item in accumulated content + Map accumulatedTextItem = null; + for (Map accItem : accumulated.content) { + if (accItem.containsKey("text")) { + accumulatedTextItem = accItem; + break; + } + } + + if (accumulatedTextItem == null) { + // Create new text content item + accumulatedTextItem = new HashMap<>(); + accumulatedTextItem.put("text", textValue); + accumulated.content.add(accumulatedTextItem); + } else { + // Append to existing text content + String existingText = (String) accumulatedTextItem.get("text"); + if (existingText == null) { + existingText = ""; + } + accumulatedTextItem.put("text", existingText + textValue); + } + } + } + } + } + + /** + * Merges tool calls from current response with accumulated tool calls. + */ + private void mergeToolCalls(List currentToolCalls, List accumulatedToolCalls) { + for (ToolCallBase currentCall : currentToolCalls) { + if (currentCall == null || currentCall.getIndex() == null) { + continue; + } + + int index = currentCall.getIndex(); + + // Find existing accumulated call with same index + ToolCallBase existingCall = null; + for (ToolCallBase accCall : accumulatedToolCalls) { + if (accCall != null && accCall.getIndex() != null && + accCall.getIndex().equals(index)) { + existingCall = accCall; + break; + } + } + + if (existingCall instanceof ToolCallFunction && + currentCall instanceof ToolCallFunction) { + // Merge function calls + ToolCallFunction existingFunctionCall = (ToolCallFunction) existingCall; + ToolCallFunction currentFunctionCall = (ToolCallFunction) currentCall; + + if (currentFunctionCall.getFunction() != null) { + // Ensure existing function call has a function object + if (existingFunctionCall.getFunction() == null) { + existingFunctionCall.setFunction(existingFunctionCall.new CallFunction()); + } + + // Accumulate arguments if present + if (currentFunctionCall.getFunction().getArguments() != null) { + String existingArguments = existingFunctionCall.getFunction().getArguments(); + if (existingArguments == null) { + existingArguments = ""; + } + String currentArguments = currentFunctionCall.getFunction().getArguments(); + existingFunctionCall.getFunction().setArguments(existingArguments + currentArguments); + } + + // Accumulate function name if present + if (currentFunctionCall.getFunction().getName() != null) { + String existingName = existingFunctionCall.getFunction().getName(); + if (existingName == null) { + existingName = ""; + } + String currentName = currentFunctionCall.getFunction().getName(); + existingFunctionCall.getFunction().setName(existingName + currentName); + } + + // Update function output if present + if (currentFunctionCall.getFunction().getOutput() != null) { + existingFunctionCall.getFunction().setOutput(currentFunctionCall.getFunction().getOutput()); + } + } + + // Update other fields with latest non-empty values + if (currentFunctionCall.getIndex() != null) { + existingFunctionCall.setIndex(currentFunctionCall.getIndex()); + } + if (currentFunctionCall.getId() != null && !currentFunctionCall.getId().isEmpty()) { + existingFunctionCall.setId(currentFunctionCall.getId()); + } + if (currentFunctionCall.getType() != null) { + existingFunctionCall.setType(currentFunctionCall.getType()); + } + } else { + // Add new tool call (create a copy) + if (currentCall instanceof ToolCallFunction) { + ToolCallFunction currentFunctionCall = (ToolCallFunction) currentCall; + ToolCallFunction newFunctionCall = new ToolCallFunction(); + newFunctionCall.setIndex(currentFunctionCall.getIndex()); + newFunctionCall.setId(currentFunctionCall.getId()); + newFunctionCall.setType(currentFunctionCall.getType()); + + if (currentFunctionCall.getFunction() != null) { + ToolCallFunction.CallFunction newCallFunction = newFunctionCall.new CallFunction(); + newCallFunction.setName(currentFunctionCall.getFunction().getName()); + newCallFunction.setArguments(currentFunctionCall.getFunction().getArguments()); + newCallFunction.setOutput(currentFunctionCall.getFunction().getOutput()); + newFunctionCall.setFunction(newCallFunction); + } + + accumulatedToolCalls.add(newFunctionCall); + } else { + // For other types of tool calls, add directly (assuming they are immutable or don't need merging) + accumulatedToolCalls.add(currentCall); + } + } + } + } + + /** + * Clears accumulated data for the current thread. + * Should be called when streaming is complete or encounters error. + */ + private void clearAccumulatedData() { + accumulatedDataMap.get().clear(); + accumulatedDataMap.remove(); + } + + /** + * Inner class to store accumulated data for response merging. + */ + private static class AccumulatedData { + List> content = new ArrayList<>(); + } +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java new file mode 100644 index 0000000..d30a490 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java @@ -0,0 +1,54 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.common.AsyncTaskInfo; +import com.alibaba.dashscope.common.DashScopeResult; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.List; + +@Data +@Slf4j +public class ImageGenerationListResult { + @SerializedName("request_id") + private String requestId; + + private List data; + + private Integer total; + + @SerializedName("total_page") + private Integer totalPage; + + @SerializedName("page_no") + private Integer pageNo; + + @SerializedName("page_size") + private Integer pageSize; + + @SerializedName("status_code") + private Integer statusCode; + + private String code; + + private String message; + + public static ImageGenerationListResult fromDashScopeResult(DashScopeResult dashScopeResult) { + if (dashScopeResult.getOutput() != null) { + ImageGenerationListResult rs = + (JsonUtils.fromJsonObject( + (JsonObject) dashScopeResult.getOutput(), ImageGenerationListResult.class)); + rs.requestId = dashScopeResult.getRequestId(); + rs.statusCode = dashScopeResult.getStatusCode(); + rs.code = dashScopeResult.getCode(); + rs.message = dashScopeResult.getMessage(); + return rs; + } else { + log.error("Result no output: {}", dashScopeResult); + } + return null; + } +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java new file mode 100644 index 0000000..9840b33 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java @@ -0,0 +1,20 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; + +import java.util.List; +import java.util.Map; + +@Data +@SuperBuilder +@NoArgsConstructor +public class ImageGenerationMessage { + + /** The role, can be `user` and `bot`. */ + private String role; + + /** The conversation content. */ + private List> content; +} \ No newline at end of file diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java new file mode 100644 index 0000000..bc045ca --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java @@ -0,0 +1,51 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.aigc.multimodalconversation.AudioResult; +import com.google.gson.annotations.SerializedName; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; + +import java.util.List; + +@Data +@SuperBuilder +@NoArgsConstructor +public class ImageGenerationOutput { + // output message. + @Data + @SuperBuilder + @NoArgsConstructor + public static class Choice { + @SerializedName("finish_reason") + private String finishReason; + + private ImageGenerationMessage message; + } + + private List choices; + + @SerializedName("finish_reason") + private String finishReason; + + @SerializedName("audio") + private AudioResult audio; + + @SerializedName("task_id") + private String taskId; + + @SerializedName("task_status") + private String taskStatus; + + @SerializedName("finished") + private Boolean finished; + + @SerializedName("submit_time") + private String submitTime; + + @SerializedName("scheduled_time") + private String scheduledTime; + + @SerializedName("end_time") + private String endTime; +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java new file mode 100644 index 0000000..804832b --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java @@ -0,0 +1,168 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.base.HalfDuplexServiceParam; +import com.alibaba.dashscope.common.ResponseFormat; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.utils.ApiKeywords; +import com.alibaba.dashscope.utils.JsonUtils; +import com.alibaba.dashscope.utils.ParamUtils; +import com.google.gson.JsonObject; +import lombok.*; +import lombok.experimental.SuperBuilder; + +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@EqualsAndHashCode(callSuper = true) +@Data +@SuperBuilder +public class ImageGenerationParam extends HalfDuplexServiceParam { + + @Singular private List messages; + + /* + * When generating, the seed of the random number is used to control the randomness of the model generation. + * If you use the same seed, each run will generate the same results; + * you can use the same seed when you need to reproduce the model's generated results. + * The seed parameter supports unsigned 64-bit integer types. Default value 1234 + */ + private Integer seed; + + /** + * Used to control the streaming output mode. If true, the subsequent output will include the + * previously input content by default. Otherwise, the subsequent output will not include the + * previously output content. Default: false eg(false): + * + *
+   * I
+   * I like
+   * I like apple
+   * when true:
+   * I
+   * like
+   * apple
+   * 
+ */ + @Builder.Default private Boolean incrementalOutput = null; + + /** Output format of the model including "text" and "audio". Default value: ["text"] */ + private List modalities; + + /** response format */ + private ResponseFormat responseFormat; + + /** negative prompt */ + private String negativePrompt; + + /** prompt extend */ + private Boolean promptExtend; + + /** watermark */ + private Boolean watermark; + + /** picture size */ + private String size; + + /** number of images */ + private Integer n; + + private Boolean enableInterleave; + + private Boolean stream; + + private Integer maxImages; + + @Override + public JsonObject getHttpBody() { + JsonObject requestObject = new JsonObject(); + requestObject.addProperty(ApiKeywords.MODEL, getModel()); + requestObject.add(ApiKeywords.INPUT, getInput()); + Map params = getParameters(); + if (params != null && !params.isEmpty()) { + requestObject.add(ApiKeywords.PARAMETERS, JsonUtils.parametersToJsonObject(params)); + } + return requestObject; + } + + @Override + public JsonObject getInput() { + JsonObject jsonObject = new JsonObject(); + jsonObject.add(ApiKeywords.MESSAGES, JsonUtils.toJsonArray(messages)); + return jsonObject; + } + + @Override + public Map getParameters() { + Map params = new HashMap<>(); + if (seed != null) { + params.put(ApiKeywords.SEED, seed); + } + // Apply different logic based on model version + if (ParamUtils.isQwenVersionThreeOrHigher(getModel())) { + if (incrementalOutput != null) { + params.put(ApiKeywords.INCREMENTAL_OUTPUT, incrementalOutput); + } + } else { + if (Boolean.TRUE.equals(incrementalOutput)) { + params.put(ApiKeywords.INCREMENTAL_OUTPUT, incrementalOutput); + } + } + + if (modalities != null) { + params.put(ApiKeywords.MODALITIES, modalities); + } + + if (responseFormat != null) { + params.put("response_format", responseFormat); + } + + if (negativePrompt != null) { + params.put("negative_prompt", negativePrompt); + } + + if (promptExtend != null) { + params.put("prompt_extend", promptExtend); + } + + if (watermark != null) { + params.put("watermark", watermark); + } + + if (size != null) { + params.put("size", size); + } + + if (n != null) { + params.put("n", n); + } + + if (enableInterleave != null) { + params.put("enable_interleave", enableInterleave); + } + + if (stream != null) { + params.put("stream", stream); + } + + if (maxImages != null) { + params.put("max_images", maxImages); + } + + params.putAll(parameters); + return params; + } + + @Override + public ByteBuffer getBinaryData() { + return null; + } + + @Override + public void validate() throws InputRequiredException { + if (messages == null || messages.isEmpty()) { + throw new InputRequiredException("Message must not null or empty!"); + } + } +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationResult.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationResult.java new file mode 100644 index 0000000..5fa6b42 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationResult.java @@ -0,0 +1,45 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.common.DashScopeResult; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import com.google.gson.annotations.SerializedName; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Data +public class ImageGenerationResult { + private String requestId; + private ImageGenerationUsage usage; + private ImageGenerationOutput output; + + @SerializedName("status_code") + private Integer statusCode; + + private String code; + private String message; + + private ImageGenerationResult() {} + + public static ImageGenerationResult fromDashScopeResult(DashScopeResult dashScopeResult) { + ImageGenerationResult result = new ImageGenerationResult(); + result.setRequestId(dashScopeResult.getRequestId()); + result.setStatusCode(dashScopeResult.getStatusCode()); + result.setCode(dashScopeResult.getCode()); + result.setMessage(dashScopeResult.getMessage()); + if (dashScopeResult.getUsage() != null) { + result.setUsage( + JsonUtils.fromJsonObject( + dashScopeResult.getUsage().getAsJsonObject(), ImageGenerationUsage.class)); + } + if (dashScopeResult.getOutput() != null) { + result.setOutput( + JsonUtils.fromJsonObject( + (JsonObject) dashScopeResult.getOutput(), ImageGenerationOutput.class)); + } else { + log.error("Result no output: {}", dashScopeResult); + } + return result; + } +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationUsage.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationUsage.java new file mode 100644 index 0000000..f0689c6 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationUsage.java @@ -0,0 +1,50 @@ +package com.alibaba.dashscope.aigc.imagegeneration; + +import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationTokensDetails; +import com.google.gson.annotations.SerializedName; +import lombok.Data; + +@Data +public class ImageGenerationUsage { + @SerializedName("input_tokens") + private Integer inputTokens; + + @SerializedName("output_tokens") + private Integer outputTokens; + + @SerializedName("total_tokens") + private Integer totalTokens; + + @SerializedName("image_tokens") + private Integer imageTokens; + + @SerializedName("video_tokens") + private Integer videoTokens; + + @SerializedName("audio_tokens") + private Integer audioTokens; + + @SerializedName("image_count") + private Integer imageCount; + + @SerializedName("width") + private Integer width; + + @SerializedName("height") + private Integer height; + + @SerializedName("seconds") + private Integer seconds; + + @SerializedName("input_tokens_details") + private MultiModalConversationTokensDetails inputTokensDetails; + + @SerializedName("output_tokens_details") + private MultiModalConversationTokensDetails outputTokensDetails; + + @SerializedName("characters") + private Integer characters; + + @SerializedName("size") + private String size; +} diff --git a/src/main/java/com/alibaba/dashscope/common/Task.java b/src/main/java/com/alibaba/dashscope/common/Task.java index eab10f8..e09bd63 100644 --- a/src/main/java/com/alibaba/dashscope/common/Task.java +++ b/src/main/java/com/alibaba/dashscope/common/Task.java @@ -1,10 +1,14 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.common; +import lombok.Getter; + +@Getter public enum Task { TEXT_GENERATION("text-generation"), CODE_GENERATION("code-generation"), MULTIMODAL_GENERATION("multimodal-generation"), + IMAGE_GENERATION("image-generation"), IMAGE_SYNTHESIS("text2image"), TEXT_EMBEDDING("text-embedding"), MULTIMODAL_EMBEDDING("multimodal-embedding"), @@ -16,11 +20,8 @@ public enum Task { private final String value; - private Task(String value) { + Task(String value) { this.value = value; } - public String getValue() { - return value; - } } diff --git a/src/main/java/com/alibaba/dashscope/common/TaskGroup.java b/src/main/java/com/alibaba/dashscope/common/TaskGroup.java index c10b9f3..7ffa369 100644 --- a/src/main/java/com/alibaba/dashscope/common/TaskGroup.java +++ b/src/main/java/com/alibaba/dashscope/common/TaskGroup.java @@ -1,6 +1,9 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.common; +import lombok.Getter; + +@Getter public enum TaskGroup { AIGC("aigc"), EMBEDDINGS("embeddings"), @@ -11,11 +14,8 @@ public enum TaskGroup { private final String value; - private TaskGroup(String value) { + TaskGroup(String value) { this.value = value; } - public String getValue() { - return value; - } } diff --git a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java index cf97e47..3dc187d 100644 --- a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java +++ b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java @@ -1,9 +1,11 @@ package com.alibaba.dashscope.utils; +import com.alibaba.dashscope.aigc.imagegeneration.ImageGenerationMessage; import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalMessageItemBase; import com.alibaba.dashscope.common.MultiModalMessage; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; + import java.io.File; import java.net.URI; import java.net.URISyntaxException; @@ -326,6 +328,31 @@ public static PreprocessResult preProcessMultiModalMessageInputs( return new PreprocessResult(hasUpload, cert); } + public static PreprocessResult preProcessMultiModalMessageInputs( + String model, ImageGenerationMessage messages, String apiKey, + OSSUploadCertificate certificate) + throws NoApiKeyException, UploadFileException { + boolean hasUpload = false; + OSSUploadCertificate cert = certificate; + List> content = new ArrayList<>(); + + for (Map item : messages.getContent()) { + content.add(new HashMap<>(item)); + } + for (Map item : content) { + for (Map.Entry entry : item.entrySet()) { + CheckAndUploadResult result = checkAndUploadMultiModalMessage( + model, entry, apiKey, cert); + if (result.isUpload() && !hasUpload) { + hasUpload = true; + } + cert = result.getCertificate(); + } + } + messages.setContent(content); + return new PreprocessResult(hasUpload, cert); + } + /** * Preprocess multimodal message inputs without certificate reuse. * From fd1811e486c6720a8fc4f079976158fb47ba2df4 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Mon, 12 Jan 2026 13:17:45 +0800 Subject: [PATCH 45/64] support wan2.6 video enableOverlays params --- .../videosynthesis/VideoSynthesisParam.java | 22 +++++++++++++------ .../alibaba/dashscope/utils/ApiKeywords.java | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index 225d957..ef15d30 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -1,8 +1,6 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.aigc.videosynthesis; -import static com.alibaba.dashscope.utils.ApiKeywords.*; - import com.alibaba.dashscope.base.HalfDuplexServiceParam; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; @@ -11,17 +9,20 @@ import com.alibaba.dashscope.utils.JsonUtils; import com.alibaba.dashscope.utils.PreprocessInputImage; import com.google.gson.JsonObject; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Singular; import lombok.experimental.SuperBuilder; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.alibaba.dashscope.utils.ApiKeywords.*; + @EqualsAndHashCode(callSuper = true) @Data @SuperBuilder @@ -93,6 +94,9 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { @Builder.Default private String shotType = null; + /** The enable_overlays parameter. */ + @Builder.Default private Boolean enableOverlays = null; + /** The inputs of the model. */ @Override public JsonObject getInput() { @@ -186,6 +190,10 @@ public Map getParameters() { if (shotType != null) { params.put(SHOT_TYPE, shotType); } + if (enableOverlays != null) { + params.put(ENABLE_OVERLAYS, enableOverlays); + } + params.putAll(super.getParameters()); return params; } diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index e56e77f..f318b80 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -197,4 +197,6 @@ public class ApiKeywords { public static final String REFERENCE_VIDEO_DESCRIPTION = "reference_video_description"; public static final String SHOT_TYPE = "shot_type"; + + public static final String ENABLE_OVERLAYS = "enable_overlays"; } From cce161d1d2cfe93ddb2830d536b27e4c0448db69 Mon Sep 17 00:00:00 2001 From: "mose-x.zm" Date: Thu, 15 Jan 2026 15:46:36 +0800 Subject: [PATCH 46/64] add video output params --- .../aigc/videosynthesis/VideoSynthesisOutput.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java index 8b470b9..f7f1aa8 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisOutput.java @@ -23,4 +23,16 @@ public class VideoSynthesisOutput { @SerializedName("orig_prompt") private String origPrompt; + + @SerializedName("actual_prompt") + private String actualPrompt; + + @SerializedName("submit_time") + private String submitTime; + + @SerializedName("scheduled_time") + private String scheduledTime; + + @SerializedName("end_time") + private String endTime; } From 4789f846c2c73234a462acf07f24020ccf83302a Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 16 Jan 2026 10:32:24 +0800 Subject: [PATCH 47/64] release version 2.22.6 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bb0f1cc..acb62eb 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.5 + 2.22.6 8 From 9cf3c1aac83a387981a470d0e1844423b5a5529f Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 22 Jan 2026 16:01:07 +0800 Subject: [PATCH 48/64] feat(ci): add linter action --- .github/workflows/linter.yml | 48 +++ .../dashscope/aigc/generation/Generation.java | 160 ++++---- .../aigc/generation/GenerationLogprobs.java | 35 +- .../GenerationOutputTokenDetails.java | 4 +- .../aigc/generation/GenerationParam.java | 6 +- .../aigc/generation/GenerationResult.java | 2 + .../aigc/generation/SearchOptions.java | 5 +- .../aigc/generation/TranslationOptions.java | 67 ++-- .../aigc/imagegeneration/ImageGeneration.java | 139 ++++--- .../ImageGenerationListResult.java | 3 +- .../ImageGenerationMessage.java | 15 +- .../ImageGenerationOutput.java | 3 +- .../imagegeneration/ImageGenerationParam.java | 5 +- .../aigc/imagesynthesis/ImageSynthesis.java | 21 +- .../imagesynthesis/ImageSynthesisParam.java | 9 +- .../AudioParameters.java | 1 - .../MultiModalConversation.java | 97 ++--- .../aigc/videosynthesis/VideoSynthesis.java | 30 +- .../videosynthesis/VideoSynthesisParam.java | 34 +- .../dashscope/app/ApplicationParam.java | 36 +- .../dashscope/app/CipServiceCodes.java | 8 +- .../alibaba/dashscope/app/FlowStreamMode.java | 32 +- .../dashscope/app/WorkflowMessage.java | 42 +-- .../dashscope/assistants/Assistant.java | 12 +- .../dashscope/assistants/AssistantParam.java | 12 +- .../dashscope/assistants/Assistants.java | 23 +- .../audio/asr/recognition/Recognition.java | 20 +- .../asr/vocabulary/VocabularyService.java | 14 +- .../audio/omni/OmniRealtimeConfig.java | 27 +- .../audio/omni/OmniRealtimeConstants.java | 2 +- .../audio/omni/OmniRealtimeConversation.java | 34 +- .../omni/OmniRealtimeTranscriptionParam.java | 66 ++-- .../omni/OmniRealtimeTranslationParam.java | 23 +- .../qwen_asr/QwenTranscriptionParam.java | 8 +- .../qwen_asr/QwenTranscriptionQueryParam.java | 5 +- .../qwen_asr/QwenTranscriptionResult.java | 4 - .../qwen_tts_realtime/QwenTtsRealtime.java | 1 + .../QwenTtsRealtimeConfig.java | 2 +- .../QwenTtsRealtimeParam.java | 2 +- .../audio/tts/SpeechSynthesisResult.java | 4 +- .../audio/tts/SpeechSynthesizer.java | 5 +- .../ttsv2/SpeechSynthesisAudioFormat.java | 1 - .../audio/ttsv2/SpeechSynthesisParam.java | 14 +- .../audio/ttsv2/SpeechSynthesizer.java | 4 +- .../enrollment/VoiceEnrollmentParam.java | 2 - .../enrollment/VoiceEnrollmentService.java | 14 +- .../dashscope/common/MessageAdapter.java | 22 +- .../dashscope/common/MessageContentText.java | 4 +- .../dashscope/common/MultiModalMessage.java | 5 +- .../common/MultiModalMessageAdapter.java | 17 +- .../com/alibaba/dashscope/common/Task.java | 1 - .../alibaba/dashscope/common/TaskGroup.java | 1 - .../embeddings/TextEmbeddingParam.java | 28 +- .../TextEmbeddingSparseEmbedding.java | 1 - .../multimodal/MultiModalDialog.java | 146 ++++---- .../MultiModalDialogApiKeyWords.java | 5 +- .../multimodal/MultiModalDialogCallback.java | 45 +-- .../multimodal/MultiModalRequestParam.java | 42 +-- .../alibaba/dashscope/multimodal/State.java | 7 +- .../dashscope/multimodal/tingwu/TingWu.java | 97 +++-- .../multimodal/tingwu/TingWuParam.java | 10 +- .../multimodal/tingwu/TingWuRealtime.java | 53 +-- .../tingwu/TingWuRealtimeCallback.java | 26 +- .../tingwu/TingWuRealtimeParam.java | 54 ++- .../tingwu/TingWuRealtimeResult.java | 1 - .../dashscope/protocol/ClientProviders.java | 6 +- .../dashscope/protocol/ConnectionOptions.java | 1 - .../dashscope/protocol/DashScopeHeaders.java | 7 +- .../dashscope/protocol/FullDuplexRequest.java | 2 +- .../dashscope/protocol/HalfDuplexRequest.java | 4 +- .../protocol/okhttp/OkHttpClientFactory.java | 11 +- .../okhttp/OkHttpWebSocketClient.java | 7 +- .../okhttp/OkHttpWebSocketClientForAudio.java | 286 ++++++++------- .../alibaba/dashscope/rerank/TextReRank.java | 8 +- .../dashscope/rerank/TextReRankParam.java | 20 +- .../alibaba/dashscope/utils/ApiKeywords.java | 16 +- .../alibaba/dashscope/utils/Constants.java | 4 +- .../dashscope/utils/OSSUploadCertificate.java | 16 +- .../com/alibaba/dashscope/utils/OSSUtils.java | 31 +- .../alibaba/dashscope/utils/ParamUtils.java | 115 +++--- .../dashscope/utils/PreprocessInputImage.java | 277 +++++++------- .../utils/PreprocessMessageInput.java | 126 +++---- .../alibaba/dashscope/utils/UploadResult.java | 5 +- .../alibaba/dashscope/TestImageSynthesis.java | 68 ++-- .../TestMultiModalConversationQwenTTS.java | 26 +- .../dashscope/TestMultimodalDialog.java | 345 +++++++++--------- .../dashscope/TestQwenTtsRealtime.java | 229 ++++++------ .../alibaba/dashscope/TestVideoSynthesis.java | 53 ++- 88 files changed, 1591 insertions(+), 1738 deletions(-) create mode 100644 .github/workflows/linter.yml diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 0000000..9b83046 --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,48 @@ +# +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name: 👀 Linter +on: + push: + branches: + - main + - master + pull_request: + branches: + - main + - master +permissions: + contents: read +jobs: + linter: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + - name: Check code formatting + run: | + # Check if any files need formatting + if ! java -jar .dev_tools/google-java-format-1.7-all-deps.jar \ + --dry-run --set-exit-if-changed \ + $(find . -type f -name "*.java" | grep "./*/src/.*java"); then + echo "❌ Code formatting issues found!" + echo "Please run 'bash lint.sh' locally to fix formatting issues." + exit 1 + fi + echo "✅ All files are properly formatted" diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java b/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java index 80cdf3e..27a4300 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/Generation.java @@ -17,15 +17,15 @@ import com.alibaba.dashscope.protocol.HttpMethod; import com.alibaba.dashscope.protocol.Protocol; import com.alibaba.dashscope.protocol.StreamingMode; +import com.alibaba.dashscope.tools.ToolCallBase; +import com.alibaba.dashscope.tools.ToolCallFunction; import com.alibaba.dashscope.utils.ParamUtils; import io.reactivex.Flowable; -import lombok.extern.slf4j.Slf4j; +import java.util.ArrayList; import java.util.HashMap; -import java.util.Map; import java.util.List; -import java.util.ArrayList; -import com.alibaba.dashscope.tools.ToolCallBase; -import com.alibaba.dashscope.tools.ToolCallFunction; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; @Slf4j public final class Generation { @@ -169,26 +169,29 @@ public Flowable streamCall(HalfDuplexServiceParam param) serviceOption.setIsSSE(true); serviceOption.setStreamingMode(StreamingMode.OUT); - return syncApi.streamCall(param) + return syncApi + .streamCall(param) .map(GenerationResult::fromDashScopeResult) - .flatMap(result -> { - GenerationResult merged = - mergeSingleResponse(result, toMergeResponse, param); - if (merged == null) { - return Flowable.empty(); - } - return Flowable.just(merged); - }) - .doOnComplete(() -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }) - .doOnError(throwable -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }); + .flatMap( + result -> { + GenerationResult merged = mergeSingleResponse(result, toMergeResponse, param); + if (merged == null) { + return Flowable.empty(); + } + return Flowable.just(merged); + }) + .doOnComplete( + () -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }) + .doOnError( + throwable -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }); } public void streamCall(HalfDuplexServiceParam param, ResultCallback callback) @@ -236,9 +239,9 @@ public void onError(Exception e) { } /** - * Modifies the parameters for internal streaming optimization. - * If incrementalOutput is false, modifies the GenerationParam object to set - * incrementalOutput to true for internal streaming optimization. + * Modifies the parameters for internal streaming optimization. If incrementalOutput is false, + * modifies the GenerationParam object to set incrementalOutput to true for internal streaming + * optimization. * * @param param The parameter object to modify * @return true if the parameter was modified, false otherwise @@ -248,8 +251,8 @@ private boolean modifyIncrementalOutput(HalfDuplexServiceParam param) { if (param instanceof GenerationParam) { GenerationParam generationParam = (GenerationParam) param; Boolean incrementalOutput = generationParam.getIncrementalOutput(); - if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) && - Boolean.FALSE.equals(incrementalOutput)) { + if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) + && Boolean.FALSE.equals(incrementalOutput)) { // Modify the GenerationParam object to enable incremental output generationParam.setIncrementalOutput(true); return true; @@ -259,20 +262,17 @@ private boolean modifyIncrementalOutput(HalfDuplexServiceParam param) { } /** - * Merges a single GenerationResult with accumulated data for - * non-incremental output simulation. - * This method accumulates content and tool_calls from streaming responses. - * Supports both legacy format (output.text) and new format - * (output.choices[].message.content). + * Merges a single GenerationResult with accumulated data for non-incremental output simulation. + * This method accumulates content and tool_calls from streaming responses. Supports both legacy + * format (output.text) and new format (output.choices[].message.content). * * @param result The GenerationResult to merge - * @param toMergeResponse Whether to perform merging (based on original - * incrementalOutput setting) + * @param toMergeResponse Whether to perform merging (based on original incrementalOutput setting) * @param param The HalfDuplexServiceParam to get n parameter * @return The merged GenerationResult, or null if should be filtered out */ - private GenerationResult mergeSingleResponse(GenerationResult result, - boolean toMergeResponse, HalfDuplexServiceParam param) { + private GenerationResult mergeSingleResponse( + GenerationResult result, boolean toMergeResponse, HalfDuplexServiceParam param) { if (!toMergeResponse || result == null || result.getOutput() == null) { return result; } @@ -291,8 +291,7 @@ private GenerationResult mergeSingleResponse(GenerationResult result, // Check if all choices have been sent (for n > 1 case) if (n > 1 && !accumulatedData.isEmpty()) { - boolean allSent = accumulatedData.values().stream() - .allMatch(data -> data.allChoicesSent); + boolean allSent = accumulatedData.values().stream().allMatch(data -> data.allChoicesSent); if (allSent) { return null; } @@ -315,13 +314,12 @@ private GenerationResult mergeSingleResponse(GenerationResult result, } // Initialize accumulated data for this choice index if not exists - AccumulatedData accumulated = accumulatedData.computeIfAbsent( - choiceIndex, k -> new AccumulatedData()); + AccumulatedData accumulated = + accumulatedData.computeIfAbsent(choiceIndex, k -> new AccumulatedData()); if (choice.getMessage() != null) { // Save role if present - if (choice.getMessage().getRole() != null && - !choice.getMessage().getRole().isEmpty()) { + if (choice.getMessage().getRole() != null && !choice.getMessage().getRole().isEmpty()) { accumulated.role = choice.getMessage().getRole(); } @@ -356,16 +354,17 @@ private GenerationResult mergeSingleResponse(GenerationResult result, } // Restore role if we have it - if (accumulated.role != null && - (choice.getMessage().getRole() == null || - choice.getMessage().getRole().isEmpty())) { + if (accumulated.role != null + && (choice.getMessage().getRole() == null + || choice.getMessage().getRole().isEmpty())) { choice.getMessage().setRole(accumulated.role); } } // Handle logprobs accumulation if (choice.getLogprobs() != null && choice.getLogprobs().getContent() != null) { - List currentLogprobsContent = choice.getLogprobs().getContent(); + List currentLogprobsContent = + choice.getLogprobs().getContent(); if (!currentLogprobsContent.isEmpty()) { accumulated.logprobsContent.addAll(currentLogprobsContent); } @@ -376,8 +375,7 @@ private GenerationResult mergeSingleResponse(GenerationResult result, } // Handle finish_reason for n > 1 case - if (n > 1 && choice.getFinishReason() != null && - !choice.getFinishReason().equals("null")) { + if (n > 1 && choice.getFinishReason() != null && !choice.getFinishReason().equals("null")) { accumulated.finishReason = choice.getFinishReason(); accumulated.finished = true; } @@ -385,9 +383,10 @@ private GenerationResult mergeSingleResponse(GenerationResult result, // Store output_tokens for each choice when n > 1 // Each streaming packet contains usage info for one specific choice - if (n > 1 && result.getUsage() != null && - result.getUsage().getOutputTokens() != null && - !choices.isEmpty()) { + if (n > 1 + && result.getUsage() != null + && result.getUsage().getOutputTokens() != null + && !choices.isEmpty()) { // Get the choice index from the first choice in this packet Integer choiceIndex = choices.get(0).getIndex(); if (choiceIndex == null) { @@ -414,11 +413,9 @@ private GenerationResult mergeSingleResponse(GenerationResult result, String currentFinishReason = null; Integer currentChoiceIndex = null; for (GenerationOutput.Choice choice : choices) { - if (choice.getFinishReason() != null && - !choice.getFinishReason().equals("null")) { + if (choice.getFinishReason() != null && !choice.getFinishReason().equals("null")) { currentFinishReason = choice.getFinishReason(); - currentChoiceIndex = - choice.getIndex() != null ? choice.getIndex() : 0; + currentChoiceIndex = choice.getIndex() != null ? choice.getIndex() : 0; break; } } @@ -433,8 +430,7 @@ private GenerationResult mergeSingleResponse(GenerationResult result, if (finishedCount < n) { // Hide finish_reason until all finished for (GenerationOutput.Choice choice : choices) { - if (choice.getFinishReason() != null && - !choice.getFinishReason().equals("null")) { + if (choice.getFinishReason() != null && !choice.getFinishReason().equals("null")) { choice.setFinishReason("null"); } } @@ -446,8 +442,7 @@ private GenerationResult mergeSingleResponse(GenerationResult result, GenerationOutput output = result.getOutput(); List allChoices = new ArrayList<>(); int totalOutputTokens = 0; - for (Map.Entry entry : - accumulatedData.entrySet()) { + for (Map.Entry entry : accumulatedData.entrySet()) { Integer index = entry.getKey(); AccumulatedData data = entry.getValue(); GenerationOutput.Choice finalChoice = output.new Choice(); @@ -480,8 +475,9 @@ private GenerationResult mergeSingleResponse(GenerationResult result, if (result.getUsage() != null && totalOutputTokens > 0) { result.getUsage().setOutputTokens(totalOutputTokens); if (result.getUsage().getInputTokens() != null) { - result.getUsage().setTotalTokens( - result.getUsage().getInputTokens() + totalOutputTokens); + result + .getUsage() + .setTotalTokens(result.getUsage().getInputTokens() + totalOutputTokens); } } } @@ -494,15 +490,15 @@ private GenerationResult mergeSingleResponse(GenerationResult result, currentData.allChoicesSent = true; // Reuse current choice in result, just update it for (GenerationOutput.Choice choice : choices) { - if (choice.getIndex() != null && - choice.getIndex().equals(currentChoiceIndex)) { + if (choice.getIndex() != null && choice.getIndex().equals(currentChoiceIndex)) { // Update usage with this choice's output tokens if (result.getUsage() != null && currentData.outputTokens != null) { result.getUsage().setOutputTokens(currentData.outputTokens); if (result.getUsage().getInputTokens() != null) { - result.getUsage().setTotalTokens( - result.getUsage().getInputTokens() + - currentData.outputTokens); + result + .getUsage() + .setTotalTokens( + result.getUsage().getInputTokens() + currentData.outputTokens); } } return result; @@ -529,10 +525,9 @@ private GenerationResult mergeSingleResponse(GenerationResult result, return result; } - /** - * Merges tool calls from current response with accumulated tool calls. - */ - private void mergeToolCalls(List currentToolCalls, List accumulatedToolCalls) { + /** Merges tool calls from current response with accumulated tool calls. */ + private void mergeToolCalls( + List currentToolCalls, List accumulatedToolCalls) { for (ToolCallBase currentCall : currentToolCalls) { if (currentCall == null || currentCall.getIndex() == null) { continue; @@ -543,15 +538,13 @@ private void mergeToolCalls(List currentToolCalls, List currentToolCalls, List currentToolCalls, List currentToolCalls, List bytes; + private List bytes; - private String token; - } + private String token; + } - @Data - public static class Content { - @SerializedName("top_logprobs") - private List topLogprobs; + @Data + public static class Content { + @SerializedName("top_logprobs") + private List topLogprobs; - private Double logprob; + private Double logprob; - private List bytes; + private List bytes; - private String token; - } + private String token; + } - private List content; + private List content; } - - diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationOutputTokenDetails.java b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationOutputTokenDetails.java index 3bb1eae..5cb6c6f 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationOutputTokenDetails.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationOutputTokenDetails.java @@ -7,6 +7,6 @@ @Data @SuperBuilder public class GenerationOutputTokenDetails { - @SerializedName("reasoning_tokens") - private Integer reasoningTokens; + @SerializedName("reasoning_tokens") + private Integer reasoningTokens; } diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationParam.java b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationParam.java index cf52cf5..b5bcc1c 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationParam.java @@ -198,9 +198,9 @@ public Map getParameters() { params.put("incremental_output", incrementalOutput); } } else { - if (Boolean.TRUE.equals(incrementalOutput)) { - params.put(ApiKeywords.INCREMENTAL_OUTPUT, incrementalOutput); - } + if (Boolean.TRUE.equals(incrementalOutput)) { + params.put(ApiKeywords.INCREMENTAL_OUTPUT, incrementalOutput); + } } if (repetitionPenalty != null) { diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java index 3b82e4e..1154671 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/GenerationResult.java @@ -14,8 +14,10 @@ public final class GenerationResult { private String requestId; private GenerationUsage usage; private GenerationOutput output; + @SerializedName("status_code") private Integer statusCode; + private String code; private String message; diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java b/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java index 45f039d..dd01101 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/SearchOptions.java @@ -35,9 +35,8 @@ public class SearchOptions { private String searchStrategy; /** - * Whether the first data packet in streaming output contains only - * search source information. Only effective when enable_source is - * true and in streaming mode. Default is false. + * Whether the first data packet in streaming output contains only search source information. Only + * effective when enable_source is true and in streaming mode. Default is false. */ @SerializedName("prepend_search_result") @Builder.Default diff --git a/src/main/java/com/alibaba/dashscope/aigc/generation/TranslationOptions.java b/src/main/java/com/alibaba/dashscope/aigc/generation/TranslationOptions.java index e6c3527..0f2db08 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/generation/TranslationOptions.java +++ b/src/main/java/com/alibaba/dashscope/aigc/generation/TranslationOptions.java @@ -1,43 +1,42 @@ package com.alibaba.dashscope.aigc.generation; import com.google.gson.annotations.SerializedName; +import java.util.List; import lombok.Data; import lombok.experimental.SuperBuilder; -import java.util.List; - @SuperBuilder @Data public class TranslationOptions { - /** 源语言的英文全称 */ - @SerializedName("source_lang") - private String sourceLang; - - /** 源语言的英文全称 */ - @SerializedName("target_lang") - private String targetLang; - - /** 在使用领域提示功能时需要设置的领域提示语句 */ - private String domains; - - /** 在使用术语干预翻译功能时需要设置的术语数组 */ - private List terms; - - /** 在使用翻译记忆功能时需要设置的翻译记忆数组 */ - @SerializedName("tm_list") - private List tmList; - - @SuperBuilder - @Data - public static class Tm { - String source; - String target; - } - - @SuperBuilder - @Data - public static class Term { - String source; - String target; - } -} \ No newline at end of file + /** 源语言的英文全称 */ + @SerializedName("source_lang") + private String sourceLang; + + /** 源语言的英文全称 */ + @SerializedName("target_lang") + private String targetLang; + + /** 在使用领域提示功能时需要设置的领域提示语句 */ + private String domains; + + /** 在使用术语干预翻译功能时需要设置的术语数组 */ + private List terms; + + /** 在使用翻译记忆功能时需要设置的翻译记忆数组 */ + @SerializedName("tm_list") + private List tmList; + + @SuperBuilder + @Data + public static class Tm { + String source; + String target; + } + + @SuperBuilder + @Data + public static class Term { + String source; + String target; + } +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java index 106cce3..6114f0d 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGeneration.java @@ -16,12 +16,11 @@ import com.alibaba.dashscope.utils.ParamUtils; import com.alibaba.dashscope.utils.PreprocessMessageInput; import io.reactivex.Flowable; -import lombok.extern.slf4j.Slf4j; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import lombok.extern.slf4j.Slf4j; @Slf4j public final class ImageGeneration { @@ -79,8 +78,7 @@ public ImageGeneration(String protocol, String baseUrl) { this.baseUrl = baseUrl; } - public ImageGeneration( - String protocol, String baseUrl, ConnectionOptions connectionOptions) { + public ImageGeneration(String protocol, String baseUrl, ConnectionOptions connectionOptions) { serviceOption = defaultSyncApiServiceOption(); serviceOption.setProtocol(Protocol.of(protocol)); if (Protocol.HTTP.getValue().equals(protocol)) { @@ -121,8 +119,7 @@ public ImageGenerationResult call(ImageGenerationParam param) * @throws ApiException The request failed, possibly due to a network or data error. * @throws UploadFileException File upload failed. */ - public void call( - ImageGenerationParam param, ResultCallback callback) + public void call(ImageGenerationParam param, ResultCallback callback) throws ApiException, NoApiKeyException, UploadFileException { serviceOption.setIsSSE(false); serviceOption.setStreamingMode(StreamingMode.NONE); @@ -157,7 +154,7 @@ public void onError(Exception e) { * @throws UploadFileException Fail upload failed. */ public ImageGenerationResult asyncCall(ImageGenerationParam param) - throws ApiException, NoApiKeyException, UploadFileException { + throws ApiException, NoApiKeyException, UploadFileException { preprocessInput(param); serviceOption.setTask(Task.IMAGE_GENERATION.getValue()); serviceOption.setIsAsyncTask(true); @@ -165,60 +162,59 @@ public ImageGenerationResult asyncCall(ImageGenerationParam param) } public ImageGenerationListResult list(AsyncTaskListParam param) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationListResult.fromDashScopeResult(asyncApi.list(param, baseUrl)); } public ImageGenerationListResult list( - String startTime, - String endTime, - String modelName, - String apiKeyId, - String region, - String status, - Integer pageNo, - Integer pageSize) - throws ApiException, NoApiKeyException { + String startTime, + String endTime, + String modelName, + String apiKeyId, + String region, + String status, + Integer pageNo, + Integer pageSize) + throws ApiException, NoApiKeyException { return ImageGenerationListResult.fromDashScopeResult( - asyncApi.list( - startTime, endTime, modelName, apiKeyId, region, status, pageNo, pageSize, baseUrl)); + asyncApi.list( + startTime, endTime, modelName, apiKeyId, region, status, pageNo, pageSize, baseUrl)); } public ImageGenerationResult fetch(String taskId, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationResult.fromDashScopeResult(asyncApi.fetch(taskId, apiKey, baseUrl)); } public ImageGenerationResult fetch(ImageGenerationResult taskInfo, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationResult.fromDashScopeResult( - asyncApi.fetch(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); + asyncApi.fetch(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); } public ImageGenerationResult cancel(String taskId, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationResult.fromDashScopeResult(asyncApi.cancel(taskId, apiKey, baseUrl)); } public ImageGenerationResult cancel(ImageGenerationResult taskInfo, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { DashScopeResult res = asyncApi.cancel(taskInfo.getOutput().getTaskId(), apiKey, baseUrl); return ImageGenerationResult.fromDashScopeResult(res); } public ImageGenerationResult wait(String taskId, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationResult.fromDashScopeResult(asyncApi.wait(taskId, apiKey, baseUrl)); } public ImageGenerationResult wait(ImageGenerationResult taskInfo, String apiKey) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { return ImageGenerationResult.fromDashScopeResult( - asyncApi.wait(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); + asyncApi.wait(taskInfo.getOutput().getTaskId(), apiKey, baseUrl)); } - /** * Call the server to get the result by stream. * @@ -245,16 +241,18 @@ public Flowable streamCall(ImageGenerationParam param) .streamCall(param) .map(ImageGenerationResult::fromDashScopeResult) .map(result -> mergeSingleResponse(result, toMergeResponse)) - .doOnComplete(() -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }) - .doOnError(throwable -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }); + .doOnComplete( + () -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }) + .doOnError( + throwable -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }); } /** @@ -267,8 +265,7 @@ public Flowable streamCall(ImageGenerationParam param) * @throws InputRequiredException The input field is missing. * @throws UploadFileException File upload failed. */ - public void streamCall( - ImageGenerationParam param, ResultCallback callback) + public void streamCall(ImageGenerationParam param, ResultCallback callback) throws ApiException, NoApiKeyException, InputRequiredException, UploadFileException { param.validate(); @@ -319,9 +316,9 @@ private void preprocessInput(ImageGenerationParam param) for (ImageGenerationMessage msg : param.getMessages()) { boolean isUpload; PreprocessMessageInput.PreprocessResult result = - PreprocessMessageInput.preProcessMultiModalMessageInputs( - param.getModel(), msg, - param.getApiKey(), certificate); + PreprocessMessageInput.preProcessMultiModalMessageInputs( + param.getModel(), msg, + param.getApiKey(), certificate); isUpload = result.hasUpload(); certificate = result.getCertificate(); if (isUpload && !hasUpload) { @@ -334,17 +331,17 @@ private void preprocessInput(ImageGenerationParam param) } /** - * Modifies the parameters for internal streaming optimization. - * If incrementalOutput is false, modifies the ImageGenerationParam object to set - * incrementalOutput to true for internal streaming optimization. + * Modifies the parameters for internal streaming optimization. If incrementalOutput is false, + * modifies the ImageGenerationParam object to set incrementalOutput to true for internal + * streaming optimization. * * @param param The parameter object to modify * @return true if the parameter was modified, false otherwise */ private boolean modifyIncrementalOutput(ImageGenerationParam param) { Boolean incrementalOutput = param.getIncrementalOutput(); - if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) && - Boolean.FALSE.equals(incrementalOutput)) { + if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) + && Boolean.FALSE.equals(incrementalOutput)) { // Modify the ImageGenerationParam object to enable incremental output param.setIncrementalOutput(true); return true; @@ -353,14 +350,15 @@ private boolean modifyIncrementalOutput(ImageGenerationParam param) { } /** - * Merges a single ImageGenerationResult with accumulated data for non-incremental output simulation. - * This method accumulates text content and tool_calls from streaming responses. + * Merges a single ImageGenerationResult with accumulated data for non-incremental output + * simulation. This method accumulates text content and tool_calls from streaming responses. * * @param result The ImageGenerationResult to merge * @param toMergeResponse Whether to perform merging (based on original incrementalOutput setting) * @return The merged ImageGenerationResult */ - private ImageGenerationResult mergeSingleResponse(ImageGenerationResult result, boolean toMergeResponse) { + private ImageGenerationResult mergeSingleResponse( + ImageGenerationResult result, boolean toMergeResponse) { if (!toMergeResponse || result == null || result.getOutput() == null) { return result; } @@ -374,8 +372,8 @@ private ImageGenerationResult mergeSingleResponse(ImageGenerationResult result, ImageGenerationOutput.Choice choice = choices.get(choiceIdx); // Initialize accumulated data for this choice if not exists - AccumulatedData accumulated = accumulatedData.computeIfAbsent( - choiceIdx, k -> new AccumulatedData()); + AccumulatedData accumulated = + accumulatedData.computeIfAbsent(choiceIdx, k -> new AccumulatedData()); if (choice.getMessage() != null) { // Handle content accumulation (text content in content list) @@ -395,10 +393,11 @@ private ImageGenerationResult mergeSingleResponse(ImageGenerationResult result, } /** - * Merges text content from current response with accumulated content. - * For MultiModal, content is a List> where text content is in maps with "text" key. + * Merges text content from current response with accumulated content. For MultiModal, content is + * a List> where text content is in maps with "text" key. */ - private void mergeTextContent(List> currentContent, AccumulatedData accumulated) { + private void mergeTextContent( + List> currentContent, AccumulatedData accumulated) { for (Map contentItem : currentContent) { if (contentItem.containsKey("text")) { String textValue = (String) contentItem.get("text"); @@ -430,10 +429,9 @@ private void mergeTextContent(List> currentContent, Accumula } } - /** - * Merges tool calls from current response with accumulated tool calls. - */ - private void mergeToolCalls(List currentToolCalls, List accumulatedToolCalls) { + /** Merges tool calls from current response with accumulated tool calls. */ + private void mergeToolCalls( + List currentToolCalls, List accumulatedToolCalls) { for (ToolCallBase currentCall : currentToolCalls) { if (currentCall == null || currentCall.getIndex() == null) { continue; @@ -444,15 +442,13 @@ private void mergeToolCalls(List currentToolCalls, List currentToolCalls, List currentToolCalls, List currentToolCalls, List> content = new ArrayList<>(); } diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java index d30a490..0c5ff9f 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationListResult.java @@ -5,11 +5,10 @@ import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; import com.google.gson.annotations.SerializedName; +import java.util.List; import lombok.Data; import lombok.extern.slf4j.Slf4j; -import java.util.List; - @Data @Slf4j public class ImageGenerationListResult { diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java index 9840b33..ecf5a47 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationMessage.java @@ -1,20 +1,19 @@ package com.alibaba.dashscope.aigc.imagegeneration; +import java.util.List; +import java.util.Map; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; -import java.util.List; -import java.util.Map; - @Data @SuperBuilder @NoArgsConstructor public class ImageGenerationMessage { - /** The role, can be `user` and `bot`. */ - private String role; + /** The role, can be `user` and `bot`. */ + private String role; - /** The conversation content. */ - private List> content; -} \ No newline at end of file + /** The conversation content. */ + private List> content; +} diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java index bc045ca..200dd5e 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationOutput.java @@ -2,12 +2,11 @@ import com.alibaba.dashscope.aigc.multimodalconversation.AudioResult; import com.google.gson.annotations.SerializedName; +import java.util.List; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; -import java.util.List; - @Data @SuperBuilder @NoArgsConstructor diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java index 804832b..b727c85 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java @@ -7,13 +7,12 @@ import com.alibaba.dashscope.utils.JsonUtils; import com.alibaba.dashscope.utils.ParamUtils; import com.google.gson.JsonObject; -import lombok.*; -import lombok.experimental.SuperBuilder; - import java.nio.ByteBuffer; import java.util.HashMap; import java.util.List; import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; @EqualsAndHashCode(callSuper = true) @Data diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java index 22aba38..68256bb 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesis.java @@ -115,33 +115,31 @@ public ImageSynthesisResult asyncCall(ImageSynthesisParam param) // add local file support try { param.checkAndUpload(); - }catch (UploadFileException e){ + } catch (UploadFileException e) { throw new ApiException(e); } ApiServiceOption serviceOption = createServiceOptions; if (param.getModel().contains("imageedit") || param.getModel().contains("wan2.5-i2i")) { serviceOption.setTask("image2image"); } - return ImageSynthesisResult.fromDashScopeResult( - asyncApi.asyncCall(param, serviceOption)); + return ImageSynthesisResult.fromDashScopeResult(asyncApi.asyncCall(param, serviceOption)); } /** - * Note: This method currently now only supports wan2.2-t2i-flash and wan2.2-t2i-plus. - * Using other models will result in an error,More raw image models may be added for use later + * Note: This method currently now only supports wan2.2-t2i-flash and wan2.2-t2i-plus. Using other + * models will result in an error,More raw image models may be added for use later */ public ImageSynthesisResult syncCall(ImageSynthesisParam param) - throws ApiException, NoApiKeyException { + throws ApiException, NoApiKeyException { // add local file support try { param.checkAndUpload(); - }catch (UploadFileException e){ + } catch (UploadFileException e) { throw new ApiException(e); } ApiServiceOption serviceOption = createServiceOptions; serviceOption.setIsAsyncTask(false); - return ImageSynthesisResult.fromDashScopeResult( - syncApi.call(param, serviceOption)); + return ImageSynthesisResult.fromDashScopeResult(syncApi.call(param, serviceOption)); } /** @@ -157,15 +155,14 @@ public ImageSynthesisResult call(ImageSynthesisParam param) // add local file support try { param.checkAndUpload(); - }catch (UploadFileException e){ + } catch (UploadFileException e) { throw new ApiException(e); } ApiServiceOption serviceOption = createServiceOptions; if (param.getModel().contains("imageedit") || param.getModel().contains("wan2.5-i2i")) { serviceOption.setTask("image2image"); } - return ImageSynthesisResult.fromDashScopeResult( - asyncApi.call(param, serviceOption)); + return ImageSynthesisResult.fromDashScopeResult(asyncApi.call(param, serviceOption)); } /** diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java index 09b8e5d..66ac245 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagesynthesis/ImageSynthesisParam.java @@ -45,8 +45,10 @@ public class ImageSynthesisParam extends HalfDuplexServiceParam { /** Enter the URL address of the target edited image. */ @Builder.Default private String baseImageUrl = null; - /** Provide the URL address of the image of the marked area by the user. - * It should be consistent with the image resolution of the base_image_url. */ + /** + * Provide the URL address of the image of the marked area by the user. It should be consistent + * with the image resolution of the base_image_url. + */ @Builder.Default private String maskImageUrl = null; /** The extra parameters. */ @@ -161,7 +163,8 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { } } - boolean isUpload = PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); + boolean isUpload = + PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); if (isUpload) { this.putHeader("X-DashScope-OssResourceResolve", "enable"); diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java index 226912e..d77aaed 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java @@ -185,7 +185,6 @@ public enum Voice { @SerializedName("Vivian") VIVIAN("Vivian"); - private final String value; Voice(String value) { diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java index 5e22664..f915187 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversation.java @@ -8,17 +8,17 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; import com.alibaba.dashscope.protocol.*; +import com.alibaba.dashscope.tools.ToolCallBase; +import com.alibaba.dashscope.tools.ToolCallFunction; import com.alibaba.dashscope.utils.OSSUploadCertificate; import com.alibaba.dashscope.utils.ParamUtils; import com.alibaba.dashscope.utils.PreprocessMessageInput; import io.reactivex.Flowable; -import lombok.extern.slf4j.Slf4j; +import java.util.ArrayList; import java.util.HashMap; -import java.util.Map; import java.util.List; -import java.util.ArrayList; -import com.alibaba.dashscope.tools.ToolCallBase; -import com.alibaba.dashscope.tools.ToolCallFunction; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; @Slf4j public final class MultiModalConversation { @@ -157,16 +157,18 @@ public Flowable streamCall(MultiModalConversationP .streamCall(param) .map(MultiModalConversationResult::fromDashScopeResult) .map(result -> mergeSingleResponse(result, toMergeResponse)) - .doOnComplete(() -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }) - .doOnError(throwable -> { - if (toMergeResponse) { - clearAccumulatedData(); - } - }); + .doOnComplete( + () -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }) + .doOnError( + throwable -> { + if (toMergeResponse) { + clearAccumulatedData(); + } + }); } /** @@ -200,8 +202,10 @@ public void streamCall( new ResultCallback() { @Override public void onEvent(DashScopeResult msg) { - MultiModalConversationResult result = MultiModalConversationResult.fromDashScopeResult(msg); - MultiModalConversationResult mergedResult = mergeSingleResponse(result, toMergeResponse); + MultiModalConversationResult result = + MultiModalConversationResult.fromDashScopeResult(msg); + MultiModalConversationResult mergedResult = + mergeSingleResponse(result, toMergeResponse); callback.onEvent(mergedResult); } @@ -242,8 +246,7 @@ private void preprocessInput(MultiModalConversationParam param) } else { PreprocessMessageInput.PreprocessResult result = PreprocessMessageInput.preProcessMultiModalMessageInputs( - param.getModel(), (MultiModalMessage) msg, - param.getApiKey(), certificate); + param.getModel(), (MultiModalMessage) msg, param.getApiKey(), certificate); isUpload = result.hasUpload(); certificate = result.getCertificate(); } @@ -257,17 +260,17 @@ private void preprocessInput(MultiModalConversationParam param) } /** - * Modifies the parameters for internal streaming optimization. - * If incrementalOutput is false, modifies the MultiModalConversationParam object to set - * incrementalOutput to true for internal streaming optimization. + * Modifies the parameters for internal streaming optimization. If incrementalOutput is false, + * modifies the MultiModalConversationParam object to set incrementalOutput to true for internal + * streaming optimization. * * @param param The parameter object to modify * @return true if the parameter was modified, false otherwise */ private boolean modifyIncrementalOutput(MultiModalConversationParam param) { Boolean incrementalOutput = param.getIncrementalOutput(); - if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) && - Boolean.FALSE.equals(incrementalOutput)) { + if (ParamUtils.shouldModifyIncrementalOutput(param.getModel()) + && Boolean.FALSE.equals(incrementalOutput)) { // Modify the MultiModalConversationParam object to enable incremental output param.setIncrementalOutput(true); return true; @@ -276,14 +279,15 @@ private boolean modifyIncrementalOutput(MultiModalConversationParam param) { } /** - * Merges a single MultiModalConversationResult with accumulated data for non-incremental output simulation. - * This method accumulates text content and tool_calls from streaming responses. + * Merges a single MultiModalConversationResult with accumulated data for non-incremental output + * simulation. This method accumulates text content and tool_calls from streaming responses. * * @param result The MultiModalConversationResult to merge * @param toMergeResponse Whether to perform merging (based on original incrementalOutput setting) * @return The merged MultiModalConversationResult */ - private MultiModalConversationResult mergeSingleResponse(MultiModalConversationResult result, boolean toMergeResponse) { + private MultiModalConversationResult mergeSingleResponse( + MultiModalConversationResult result, boolean toMergeResponse) { if (!toMergeResponse || result == null || result.getOutput() == null) { return result; } @@ -297,8 +301,8 @@ private MultiModalConversationResult mergeSingleResponse(MultiModalConversationR MultiModalConversationOutput.Choice choice = choices.get(choiceIdx); // Initialize accumulated data for this choice if not exists - AccumulatedData accumulated = accumulatedData.computeIfAbsent( - choiceIdx, k -> new AccumulatedData()); + AccumulatedData accumulated = + accumulatedData.computeIfAbsent(choiceIdx, k -> new AccumulatedData()); if (choice.getMessage() != null) { // Handle content accumulation (text content in content list) @@ -338,10 +342,11 @@ private MultiModalConversationResult mergeSingleResponse(MultiModalConversationR } /** - * Merges text content from current response with accumulated content. - * For MultiModal, content is a List> where text content is in maps with "text" key. + * Merges text content from current response with accumulated content. For MultiModal, content is + * a List> where text content is in maps with "text" key. */ - private void mergeTextContent(List> currentContent, AccumulatedData accumulated) { + private void mergeTextContent( + List> currentContent, AccumulatedData accumulated) { for (Map contentItem : currentContent) { if (contentItem.containsKey("text")) { String textValue = (String) contentItem.get("text"); @@ -373,10 +378,9 @@ private void mergeTextContent(List> currentContent, Accumula } } - /** - * Merges tool calls from current response with accumulated tool calls. - */ - private void mergeToolCalls(List currentToolCalls, List accumulatedToolCalls) { + /** Merges tool calls from current response with accumulated tool calls. */ + private void mergeToolCalls( + List currentToolCalls, List accumulatedToolCalls) { for (ToolCallBase currentCall : currentToolCalls) { if (currentCall == null || currentCall.getIndex() == null) { continue; @@ -387,15 +391,13 @@ private void mergeToolCalls(List currentToolCalls, List currentToolCalls, List currentToolCalls, List currentToolCalls, List> content = new ArrayList<>(); List toolCalls = new ArrayList<>(); diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java index 1dbc01e..5749a63 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java @@ -83,14 +83,14 @@ private ApiServiceOption getApiServiceOption() { */ private ApiServiceOption getApiServiceOption(String task) { return ApiServiceOption.builder() - .protocol(Protocol.HTTP) - .httpMethod(HttpMethod.POST) - .streamingMode(StreamingMode.NONE) - .taskGroup(taskGroup) - .task(task) - .function(function) - .isAsyncTask(true) - .build(); + .protocol(Protocol.HTTP) + .httpMethod(HttpMethod.POST) + .streamingMode(StreamingMode.NONE) + .taskGroup(taskGroup) + .task(task) + .function(function) + .isAsyncTask(true) + .build(); } /** default VideoSynthesis constructor */ @@ -135,20 +135,19 @@ public VideoSynthesis(String baseUrl, String task) { * @throws InputRequiredException Check the input param. */ public VideoSynthesisResult asyncCall(VideoSynthesisParam param) - throws ApiException, NoApiKeyException, InputRequiredException { + throws ApiException, NoApiKeyException, InputRequiredException { param.validate(); // add local file support try { param.checkAndUpload(); - }catch (UploadFileException e){ + } catch (UploadFileException e) { throw new InputRequiredException(e.getMessage()); } ApiServiceOption serviceOption = createServiceOptions; if (param.getModel().contains("kf2v")) { serviceOption.setTask("image2video"); } - return VideoSynthesisResult.fromDashScopeResult( - asyncApi.asyncCall(param, serviceOption)); + return VideoSynthesisResult.fromDashScopeResult(asyncApi.asyncCall(param, serviceOption)); } /** @@ -161,20 +160,19 @@ public VideoSynthesisResult asyncCall(VideoSynthesisParam param) * @throws InputRequiredException Check the input param. */ public VideoSynthesisResult call(VideoSynthesisParam param) - throws ApiException, NoApiKeyException, InputRequiredException { + throws ApiException, NoApiKeyException, InputRequiredException { param.validate(); // add local file support try { param.checkAndUpload(); - }catch (UploadFileException e){ + } catch (UploadFileException e) { throw new InputRequiredException(e.getMessage()); } ApiServiceOption serviceOption = createServiceOptions; if (param.getModel().contains("kf2v")) { serviceOption.setTask("image2video"); } - return VideoSynthesisResult.fromDashScopeResult( - asyncApi.call(param, serviceOption)); + return VideoSynthesisResult.fromDashScopeResult(asyncApi.call(param, serviceOption)); } /** diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index ef15d30..146bc10 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -1,6 +1,8 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.aigc.videosynthesis; +import static com.alibaba.dashscope.utils.ApiKeywords.*; + import com.alibaba.dashscope.base.HalfDuplexServiceParam; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; @@ -9,19 +11,16 @@ import com.alibaba.dashscope.utils.JsonUtils; import com.alibaba.dashscope.utils.PreprocessInputImage; import com.google.gson.JsonObject; -import lombok.Builder; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Singular; -import lombok.experimental.SuperBuilder; - import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; - -import static com.alibaba.dashscope.utils.ApiKeywords.*; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.Singular; +import lombok.experimental.SuperBuilder; @EqualsAndHashCode(callSuper = true) @Data @@ -39,8 +38,7 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { @Builder.Default private String prompt = null; /** The negative prompt is the opposite of the prompt meaning. use negativePrompt */ - @Deprecated - @Builder.Default private String negative_prompt = null; + @Deprecated @Builder.Default private String negative_prompt = null; /** The negative prompt is the opposite of the prompt meaning. */ @Builder.Default private String negativePrompt = null; @@ -49,8 +47,7 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { @Builder.Default private String template = null; /** use promptExtend in parameters */ - @Deprecated - @Builder.Default private Boolean extendPrompt = Boolean.TRUE; + @Deprecated @Builder.Default private Boolean extendPrompt = Boolean.TRUE; /** The input image url, Generate the URL of the image referenced by the video */ @Builder.Default private String imgUrl = null; @@ -58,11 +55,14 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { /** The input audio url. */ @Builder.Default private String audioUrl = null; - /** list of character reference video file urls uploaded by the user */ + /** list of character reference video file urls uploaded by the user */ @Builder.Default private List referenceVideoUrls = null; - /** For the description information of the picture and sound of the reference video, corresponding to ref video, - * it needs to be in the order of the url. If the quantity is different, an error will be reported */ + /** + * For the description information of the picture and sound of the reference video, corresponding + * to ref video, it needs to be in the order of the url. If the quantity is different, an error + * will be reported + */ @Builder.Default private List referenceVideoDescription = null; /** The extra parameters. */ @@ -238,7 +238,8 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { } } - boolean isUpload = PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); + boolean isUpload = + PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); if (isUpload) { this.putHeader("X-DashScope-OssResourceResolve", "enable"); @@ -258,5 +259,4 @@ public void checkAndUpload() throws NoApiKeyException, UploadFileException { } } } - } diff --git a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java index f579796..143d073 100644 --- a/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java +++ b/src/main/java/com/alibaba/dashscope/app/ApplicationParam.java @@ -107,49 +107,31 @@ public class ApplicationParam extends HalfDuplexParamBase { /** rag options */ private RagOptions ragOptions; - /** - * mcp server list - */ + /** mcp server list */ private List mcpServers; - /** - * enable web search - */ + /** enable web search */ private Boolean enableWebSearch; - /** - * enable system time - */ + /** enable system time */ private Boolean enableSystemTime; - /** - * enable prem model calling - */ + /** enable prem model calling */ private Boolean enablePremium; - /** - * dialog round number - */ + /** dialog round number */ private Integer dialogRound; - /** - * model ID - */ + /** model ID */ private String modelId; - /** - * stream mode for flow agent - */ + /** stream mode for flow agent */ private FlowStreamMode flowStreamMode; - /** - * enable thinking mode - */ + /** enable thinking mode */ private Boolean enableThinking; - /** - * CIP service codes for content security check - */ + /** CIP service codes for content security check */ private CipServiceCodes cipServiceCodes; @Override diff --git a/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java b/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java index e2cebff..6e29c33 100644 --- a/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java +++ b/src/main/java/com/alibaba/dashscope/app/CipServiceCodes.java @@ -18,9 +18,7 @@ public class CipServiceCodes { /** Image security check configuration */ private Image image; - /** - * Text security check configuration. - */ + /** Text security check configuration. */ @Data @Builder public static class Text { @@ -31,9 +29,7 @@ public static class Text { private String output; } - /** - * Image security check configuration. - */ + /** Image security check configuration. */ @Data @Builder public static class Image { diff --git a/src/main/java/com/alibaba/dashscope/app/FlowStreamMode.java b/src/main/java/com/alibaba/dashscope/app/FlowStreamMode.java index edb804e..affcb8d 100644 --- a/src/main/java/com/alibaba/dashscope/app/FlowStreamMode.java +++ b/src/main/java/com/alibaba/dashscope/app/FlowStreamMode.java @@ -3,28 +3,22 @@ public enum FlowStreamMode { - /** - * The streaming results from all nodes will be output in the thoughts field. - */ - FULL_THOUGHTS("full_thoughts"), + /** The streaming results from all nodes will be output in the thoughts field. */ + FULL_THOUGHTS("full_thoughts"), - /** - * Use the same output pattern as the agent application. - */ - AGENT_FORMAT("agent_format"), + /** Use the same output pattern as the agent application. */ + AGENT_FORMAT("agent_format"), - /** - * Use the output node and end node to perform the output. - */ - MESSAGE_FORMAT("message_format"); + /** Use the output node and end node to perform the output. */ + MESSAGE_FORMAT("message_format"); - private final String value; + private final String value; - private FlowStreamMode(String value) { - this.value = value; - } + private FlowStreamMode(String value) { + this.value = value; + } - public String getValue() { - return value; - } + public String getValue() { + return value; + } } diff --git a/src/main/java/com/alibaba/dashscope/app/WorkflowMessage.java b/src/main/java/com/alibaba/dashscope/app/WorkflowMessage.java index 5420d7d..fe69fa4 100644 --- a/src/main/java/com/alibaba/dashscope/app/WorkflowMessage.java +++ b/src/main/java/com/alibaba/dashscope/app/WorkflowMessage.java @@ -6,33 +6,33 @@ @Data public class WorkflowMessage { - @SerializedName("node_id") - private String nodeId; + @SerializedName("node_id") + private String nodeId; - @SerializedName("node_name") - private String nodeName; + @SerializedName("node_name") + private String nodeName; - @SerializedName("node_type") - private String nodeType; + @SerializedName("node_type") + private String nodeType; - @SerializedName("node_status") - private String nodeStatus; + @SerializedName("node_status") + private String nodeStatus; - @SerializedName("node_is_completed") - private Boolean nodeIsCompleted; + @SerializedName("node_is_completed") + private Boolean nodeIsCompleted; - @SerializedName("node_msg_seq_id") - private Integer nodeMsgSeqId; + @SerializedName("node_msg_seq_id") + private Integer nodeMsgSeqId; - @SerializedName("message") - private Message message; + @SerializedName("message") + private Message message; - @Data - public static class Message { - @SerializedName("role") - private String role; + @Data + public static class Message { + @SerializedName("role") + private String role; - @SerializedName("content") - private String content; - } + @SerializedName("content") + private String content; + } } diff --git a/src/main/java/com/alibaba/dashscope/assistants/Assistant.java b/src/main/java/com/alibaba/dashscope/assistants/Assistant.java index 85e9731..0ff9e56 100644 --- a/src/main/java/com/alibaba/dashscope/assistants/Assistant.java +++ b/src/main/java/com/alibaba/dashscope/assistants/Assistant.java @@ -69,15 +69,11 @@ public final class Assistant extends FlattenResultBase { @SerializedName("tools") private List tools; - /** - * Top P - */ + /** Top P */ @SerializedName("top_p") private Float topP; - /** - * Top K - */ + /** Top K */ @SerializedName("top_k") private Integer topK; @@ -89,9 +85,7 @@ public final class Assistant extends FlattenResultBase { @SerializedName("temperature") private Float temperature; - /** - * Max Tokens - */ + /** Max Tokens */ @SerializedName("max_tokens") private Integer maxTokens; } diff --git a/src/main/java/com/alibaba/dashscope/assistants/AssistantParam.java b/src/main/java/com/alibaba/dashscope/assistants/AssistantParam.java index 21474c1..7fa356a 100644 --- a/src/main/java/com/alibaba/dashscope/assistants/AssistantParam.java +++ b/src/main/java/com/alibaba/dashscope/assistants/AssistantParam.java @@ -42,15 +42,11 @@ public class AssistantParam extends FlattenHalfDuplexParamBase { @Default private Map metadata = null; - /** - * Top P - */ + /** Top P */ @SerializedName("top_p") private Float topP; - /** - * Top K - */ + /** Top K */ @SerializedName("top_k") private Integer topK; @@ -62,9 +58,7 @@ public class AssistantParam extends FlattenHalfDuplexParamBase { @SerializedName("temperature") private Float temperature; - /** - * Max Tokens - */ + /** Max Tokens */ @SerializedName("max_tokens") private Integer maxTokens; diff --git a/src/main/java/com/alibaba/dashscope/assistants/Assistants.java b/src/main/java/com/alibaba/dashscope/assistants/Assistants.java index 771545a..4aec7a5 100644 --- a/src/main/java/com/alibaba/dashscope/assistants/Assistants.java +++ b/src/main/java/com/alibaba/dashscope/assistants/Assistants.java @@ -25,7 +25,7 @@ public final class Assistants { private final GeneralApi api; private final GeneralServiceOption serviceOption; - + // Connection pre-warming mechanism private static final AtomicBoolean connectionPreWarmed = new AtomicBoolean(false); @@ -172,24 +172,25 @@ public AssistantFile retrieveFile( } /** - * Pre-warm the HTTP connection to reduce latency for first API call. - * Uses a lightweight list request to establish connection pool. + * Pre-warm the HTTP connection to reduce latency for first API call. Uses a lightweight list + * request to establish connection pool. */ private void preWarmConnection() { if (connectionPreWarmed.compareAndSet(false, true)) { try { // Lightweight GET request to establish connection - GeneralServiceOption warmupOption = GeneralServiceOption.builder() - .protocol(Protocol.HTTP) - .httpMethod(HttpMethod.GET) - .streamingMode(StreamingMode.OUT) - .path("assistants") - .build(); - + GeneralServiceOption warmupOption = + GeneralServiceOption.builder() + .protocol(Protocol.HTTP) + .httpMethod(HttpMethod.GET) + .streamingMode(StreamingMode.OUT) + .path("assistants") + .build(); + if (serviceOption.getBaseHttpUrl() != null) { warmupOption.setBaseHttpUrl(serviceOption.getBaseHttpUrl()); } - + api.get(GeneralListParam.builder().limit(1L).build(), warmupOption); } catch (Exception e) { // Reset flag to allow retry if pre-warming failed diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java index fa582b5..c09cb95 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/recognition/Recognition.java @@ -139,8 +139,10 @@ public Flowable streamCall( if (lastRequestId.get() == null && result.getRequestId() != null) { lastRequestId.set(result.getRequestId()); } - if (firstPackageTimeStamp < 0 && result.getSentence()!=null - && result.getSentence().getText() != null && !result.getSentence().getText().isEmpty()) { + if (firstPackageTimeStamp < 0 + && result.getSentence() != null + && result.getSentence().getText() != null + && !result.getSentence().getText().isEmpty()) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); } @@ -210,9 +212,10 @@ public void onEvent(DashScopeResult message) { lastRequestId.set(recognitionResult.getRequestId()); } if (!recognitionResult.isCompleteResult()) { - if (firstPackageTimeStamp < 0 && recognitionResult.getSentence()!=null - && recognitionResult.getSentence().getText() != null - && !recognitionResult.getSentence().getText().isEmpty()) { + if (firstPackageTimeStamp < 0 + && recognitionResult.getSentence() != null + && recognitionResult.getSentence().getText() != null + && !recognitionResult.getSentence().getText().isEmpty()) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); } @@ -323,9 +326,10 @@ public String call(RecognitionParam param, File file) { if (lastRequestId.get() == null && recognitionResult.getRequestId() != null) { lastRequestId.set(recognitionResult.getRequestId()); } - if (!recognitionResult.isCompleteResult() && recognitionResult.getSentence()!=null - && recognitionResult.getSentence().getText() != null - && !recognitionResult.getSentence().getText().isEmpty()) { + if (!recognitionResult.isCompleteResult() + && recognitionResult.getSentence() != null + && recognitionResult.getSentence().getText() != null + && !recognitionResult.getSentence().getText().isEmpty()) { if (firstPackageTimeStamp < 0) { firstPackageTimeStamp = System.currentTimeMillis(); log.debug("first package delay: " + getFirstPackageDelay()); diff --git a/src/main/java/com/alibaba/dashscope/audio/asr/vocabulary/VocabularyService.java b/src/main/java/com/alibaba/dashscope/audio/asr/vocabulary/VocabularyService.java index 83c456b..4dbc68c 100644 --- a/src/main/java/com/alibaba/dashscope/audio/asr/vocabulary/VocabularyService.java +++ b/src/main/java/com/alibaba/dashscope/audio/asr/vocabulary/VocabularyService.java @@ -77,8 +77,7 @@ public Vocabulary createVocabulary(String targetModel, String prefix, JsonArray */ public Vocabulary[] listVocabulary(String prefix) throws NoApiKeyException, InputRequiredException { - return listVocabulary( - prefix, 0, 10, VocabularyParam.builder().model(this.model).build()); + return listVocabulary(prefix, 0, 10, VocabularyParam.builder().model(this.model).build()); } /** @@ -94,10 +93,7 @@ public Vocabulary[] listVocabulary(String prefix) public Vocabulary[] listVocabulary(String prefix, int pageIndex, int pageSize) throws NoApiKeyException, InputRequiredException { return listVocabulary( - prefix, - pageIndex, - pageSize, - VocabularyParam.builder().model(this.model).build()); + prefix, pageIndex, pageSize, VocabularyParam.builder().model(this.model).build()); } /** @@ -110,8 +106,7 @@ public Vocabulary[] listVocabulary(String prefix, int pageIndex, int pageSize) */ public Vocabulary queryVocabulary(String vocabularyId) throws NoApiKeyException, InputRequiredException { - return queryVocabulary( - vocabularyId, VocabularyParam.builder().model(this.model).build()); + return queryVocabulary(vocabularyId, VocabularyParam.builder().model(this.model).build()); } /** @@ -124,8 +119,7 @@ public Vocabulary queryVocabulary(String vocabularyId) */ public void updateVocabulary(String vocabularyId, JsonArray vocabulary) throws NoApiKeyException, InputRequiredException { - updateVocabulary( - vocabularyId, vocabulary, VocabularyParam.builder().model(this.model).build()); + updateVocabulary(vocabularyId, vocabulary, VocabularyParam.builder().model(this.model).build()); } /** diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java index c317732..be127da 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConfig.java @@ -19,7 +19,7 @@ public class OmniRealtimeConfig { /** omni output modalities to be used in session */ @NonNull List modalities; - /** voice to be used in session ,not need in qwen-asr-realtime*/ + /** voice to be used in session ,not need in qwen-asr-realtime */ @Builder.Default String voice = null; /** input audio format */ @@ -51,11 +51,9 @@ public class OmniRealtimeConfig { /** The extra parameters. */ @Builder.Default Map parameters = null; /** translation configuration */ - @Builder.Default - OmniRealtimeTranslationParam translationConfig = null; + @Builder.Default OmniRealtimeTranslationParam translationConfig = null; /** transcription configuration */ - @Builder.Default - OmniRealtimeTranscriptionParam transcriptionConfig = null; + @Builder.Default OmniRealtimeTranscriptionParam transcriptionConfig = null; public JsonObject getConfig() { Map config = new HashMap<>(); @@ -93,7 +91,8 @@ public JsonObject getConfig() { Map translationConfig = new HashMap<>(); translationConfig.put(OmniRealtimeConstants.LANGUAGE, this.translationConfig.getLanguage()); if (this.translationConfig.getCorpus() != null) { - translationConfig.put(OmniRealtimeConstants.TRANSLATION_CORPUS, this.translationConfig.getCorpus()); + translationConfig.put( + OmniRealtimeConstants.TRANSLATION_CORPUS, this.translationConfig.getCorpus()); } config.put(OmniRealtimeConstants.TRANSLATION, translationConfig); } else { @@ -103,16 +102,22 @@ public JsonObject getConfig() { if (transcriptionConfig != null) { Map transcriptionConfig = new HashMap<>(); if (this.transcriptionConfig.getInputSampleRate() != null) { - config.put(OmniRealtimeConstants.SAMPLE_RATE, this.transcriptionConfig.getInputSampleRate()); + config.put( + OmniRealtimeConstants.SAMPLE_RATE, this.transcriptionConfig.getInputSampleRate()); } if (this.transcriptionConfig.getInputAudioFormat() != null) { - config.put(OmniRealtimeConstants.INPUT_AUDIO_FORMAT, this.transcriptionConfig.getInputAudioFormat()); + config.put( + OmniRealtimeConstants.INPUT_AUDIO_FORMAT, + this.transcriptionConfig.getInputAudioFormat()); } if (this.transcriptionConfig.getLanguage() != null) { - transcriptionConfig.put(OmniRealtimeConstants.LANGUAGE, this.transcriptionConfig.getLanguage()); + transcriptionConfig.put( + OmniRealtimeConstants.LANGUAGE, this.transcriptionConfig.getLanguage()); } if (this.transcriptionConfig.getCorpus() != null) { - transcriptionConfig.put(OmniRealtimeConstants.INPUT_AUDIO_TRANSCRIPTION_CORPUS, this.transcriptionConfig.getCorpus()); + transcriptionConfig.put( + OmniRealtimeConstants.INPUT_AUDIO_TRANSCRIPTION_CORPUS, + this.transcriptionConfig.getCorpus()); } config.put(OmniRealtimeConstants.INPUT_AUDIO_TRANSCRIPTION, transcriptionConfig); } @@ -127,4 +132,4 @@ public JsonObject getConfig() { JsonObject jsonObject = gson.toJsonTree(config).getAsJsonObject(); return jsonObject; } -} \ No newline at end of file +} diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java index bd84219..dd5169c 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConstants.java @@ -43,4 +43,4 @@ public class OmniRealtimeConstants { public static final String PROTOCOL_RESPONSE_TYPE_SESSION_FINISHED = "session.finished"; public static final String PROTOCOL_RESPONSE_TYPE_AUDIO_DELTA = "response.audio.delta"; public static final String PROTOCOL_RESPONSE_TYPE_RESPONSE_DONE = "response.done"; -} \ No newline at end of file +} diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java index 6510caa..80178bc 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeConversation.java @@ -10,17 +10,16 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; -import lombok.extern.slf4j.Slf4j; -import okhttp3.*; -import okio.ByteString; -import org.jetbrains.annotations.NotNull; - import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; +import okhttp3.*; +import okio.ByteString; +import org.jetbrains.annotations.NotNull; /** @author lengjiayi */ @Slf4j @@ -76,12 +75,12 @@ public void connect() throws NoApiKeyException, InterruptedException { } // block wait server session done, max 20 seconds, then close connection - public void endSession() throws InterruptedException{ + public void endSession() throws InterruptedException { endSession(DEFAULT_TIMEOUT); } // block wait server session done ,then close connection - public void endSession(int timeout) throws InterruptedException{ + public void endSession(int timeout) throws InterruptedException { checkStatus(); CountDownLatch latch = new CountDownLatch(1); disconnectLatch.set(latch); @@ -107,12 +106,11 @@ public void endSessionAsync() { Map commit_request = new HashMap<>(); commit_request.put(OmniRealtimeConstants.PROTOCOL_EVENT_ID, generateEventId()); commit_request.put( - OmniRealtimeConstants.PROTOCOL_TYPE, OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_FINISH_SESSION); + OmniRealtimeConstants.PROTOCOL_TYPE, + OmniRealtimeConstants.PROTOCOL_EVENT_TYPE_FINISH_SESSION); sendMessage(createGson().toJson(commit_request), true); } - - /** * Update session configuration, should be used before create response * @@ -380,13 +378,13 @@ public void onMessage(WebSocket webSocket, String text) { + lastFirstAudioDelay + " ms"); break; - case OmniRealtimeConstants.PROTOCOL_RESPONSE_TYPE_SESSION_FINISHED: - log.info("session: " + sessionId + " finished"); - CountDownLatch latch = disconnectLatch.get(); - if (latch != null) { - latch.countDown(); - } - break; + case OmniRealtimeConstants.PROTOCOL_RESPONSE_TYPE_SESSION_FINISHED: + log.info("session: " + sessionId + " finished"); + CountDownLatch latch = disconnectLatch.get(); + if (latch != null) { + latch.countDown(); + } + break; } } } @@ -410,4 +408,4 @@ public void onClosing(@NotNull WebSocket webSocket, int code, @NotNull String re websocktetClient.close(code, reason); log.debug("WebSocket closing: " + code + ", " + reason); } -} \ No newline at end of file +} diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java index ac9ab57..d04c943 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranscriptionParam.java @@ -1,45 +1,39 @@ package com.alibaba.dashscope.audio.omni; -import lombok.Data; - import java.util.HashMap; import java.util.Map; +import lombok.Data; /** @author songsong.shao */ @Data public class OmniRealtimeTranscriptionParam { - /** input audio sample rate*/ - private Integer inputSampleRate = null; - /** input audio format */ - private String inputAudioFormat = null; - /** input audio language */ - private String language = null; - - /** corpus for qwen-asr-realtime */ - private Map corpus = null; - - /** text content for corpus */ - private String corpusText; - - /** - * Set text in corpus to improve model recognition accuracy. - */ - public void setCorpusText(String text) { - if (corpus == null) { - corpus = new HashMap<>(); - } - this.corpusText = text; - corpus.put("text", text); - } - - /** - * Default constructor - */ - public OmniRealtimeTranscriptionParam() { - } - - public OmniRealtimeTranscriptionParam(String audioFormat, int sampleRate) { - this.inputAudioFormat = audioFormat; - this.inputSampleRate = sampleRate; + /** input audio sample rate */ + private Integer inputSampleRate = null; + /** input audio format */ + private String inputAudioFormat = null; + /** input audio language */ + private String language = null; + + /** corpus for qwen-asr-realtime */ + private Map corpus = null; + + /** text content for corpus */ + private String corpusText; + + /** Set text in corpus to improve model recognition accuracy. */ + public void setCorpusText(String text) { + if (corpus == null) { + corpus = new HashMap<>(); } -} \ No newline at end of file + this.corpusText = text; + corpus.put("text", text); + } + + /** Default constructor */ + public OmniRealtimeTranscriptionParam() {} + + public OmniRealtimeTranscriptionParam(String audioFormat, int sampleRate) { + this.inputAudioFormat = audioFormat; + this.inputSampleRate = sampleRate; + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java index a51ec9f..668c583 100644 --- a/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/omni/OmniRealtimeTranslationParam.java @@ -1,24 +1,23 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.audio.omni; +import java.util.Map; import lombok.Builder; import lombok.Data; -import java.util.Map; - /** @author songsong.shao */ @Builder @Data public class OmniRealtimeTranslationParam { - /** language for translation */ - private String language; - private Corpus corpus; + /** language for translation */ + private String language; - @Builder - @Data - public static class Corpus { - /** Custom phrases to improve translation accuracy */ - private Map phrases; // translation phrases, - } + private Corpus corpus; -} \ No newline at end of file + @Builder + @Data + public static class Corpus { + /** Custom phrases to improve translation accuracy */ + private Map phrases; // translation phrases, + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java index 16cf3fb..6ddb29a 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionParam.java @@ -4,22 +4,16 @@ import com.alibaba.dashscope.base.HalfDuplexServiceParam; import com.alibaba.dashscope.exception.InputRequiredException; -import com.alibaba.dashscope.utils.ApiKeywords; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonArray; -import com.google.gson.JsonElement; import com.google.gson.JsonObject; -import lombok.Builder; +import java.nio.ByteBuffer; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NonNull; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.List; - @EqualsAndHashCode(callSuper = true) @Data @SuperBuilder diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java index 59ea6fe..97b1693 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionQueryParam.java @@ -1,10 +1,9 @@ package com.alibaba.dashscope.audio.qwen_asr; +import java.util.Map; import lombok.Data; import lombok.experimental.SuperBuilder; -import java.util.Map; - @Data @SuperBuilder public class QwenTranscriptionQueryParam { @@ -19,7 +18,7 @@ public Map getCustomHeaders() { } public static QwenTranscriptionQueryParam FromTranscriptionParam( - QwenTranscriptionParam param, String taskId) { + QwenTranscriptionParam param, String taskId) { return QwenTranscriptionQueryParam.builder() .apiKey(param.getApiKey()) .taskId(taskId) diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java index f0162e5..b698f6c 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_asr/QwenTranscriptionResult.java @@ -6,16 +6,12 @@ import com.alibaba.dashscope.common.TaskStatus; import com.alibaba.dashscope.exception.ApiException; import com.alibaba.dashscope.utils.ApiKeywords; -import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.annotations.SerializedName; import lombok.Data; import lombok.EqualsAndHashCode; -import java.util.ArrayList; -import java.util.List; - @Data @EqualsAndHashCode() public class QwenTranscriptionResult { diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java index 3771a9a..a4dc9ba 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtime.java @@ -184,6 +184,7 @@ public void sendRaw(String rawData) { public String getSessionId() { return sessionId; } + public String getResponseId() { return lastResponseId; } diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index 5da409f..4ab517b 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -77,7 +77,7 @@ public JsonObject getConfig() { config.put(QwenTtsRealtimeConstants.BIT_RATE, this.bitRate); } if (languageType != null) { - config.put(QwenTtsRealtimeConstants.LANGUAGE_TYPE,languageType); + config.put(QwenTtsRealtimeConstants.LANGUAGE_TYPE, languageType); } if (enableTn != null) { diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeParam.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeParam.java index 3e0e857..bd0412b 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeParam.java @@ -18,7 +18,7 @@ public class QwenTtsRealtimeParam { public static String baseWebsocketApiUrl = System.getenv() .getOrDefault( - DASHSCOPE_WEBSOCKET_QWEN_TTS_REALTIME_BASE_URL_ENV, + DASHSCOPE_WEBSOCKET_QWEN_TTS_REALTIME_BASE_URL_ENV, String.format("wss://dashscope.aliyuncs.com/api-ws/v1/realtime")); /** The model to use. */ diff --git a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java index 36f7c17..4671fce 100644 --- a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java +++ b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisResult.java @@ -42,8 +42,8 @@ public static SpeechSynthesisResult fromDashScopeResult(DashScopeResult dashScop SpeechSynthesisResult result = new SpeechSynthesisResult(); if (dashScopeResult.getOutput() instanceof ByteBuffer) { result.audioFrame = cloneBuffer((ByteBuffer) dashScopeResult.getOutput()); - }else if (dashScopeResult.getOutput() instanceof JsonObject) { - result.output =(JsonObject) dashScopeResult.getOutput(); + } else if (dashScopeResult.getOutput() instanceof JsonObject) { + result.output = (JsonObject) dashScopeResult.getOutput(); } try { if (dashScopeResult.getRequestId() != null) { diff --git a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesizer.java b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesizer.java index ee3514f..686d078 100644 --- a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesizer.java +++ b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesizer.java @@ -11,9 +11,6 @@ import com.alibaba.dashscope.protocol.Protocol; import com.alibaba.dashscope.protocol.StreamingMode; import io.reactivex.Flowable; -import lombok.Getter; -import lombok.extern.slf4j.Slf4j; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -21,6 +18,8 @@ import java.nio.channels.WritableByteChannel; import java.util.*; import java.util.concurrent.atomic.AtomicReference; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; @Slf4j public final class SpeechSynthesizer { diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisAudioFormat.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisAudioFormat.java index c57dc57..218103b 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisAudioFormat.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisAudioFormat.java @@ -32,7 +32,6 @@ public enum SpeechSynthesisAudioFormat { OGG_OPUS_48KHZ_MONO_32KBPS("opus", 48000, "mono", 32), OGG_OPUS_48KHZ_MONO_64KBPS("opus", 48000, "mono", 64); - private final String format; private final int sampleRate; private final String channels; diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java index 0fb5e20..f1c535d 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java @@ -4,15 +4,14 @@ import com.alibaba.dashscope.audio.tts.SpeechSynthesisTextType; import com.alibaba.dashscope.base.FullDuplexServiceParam; import io.reactivex.Flowable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.SuperBuilder; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - /** @author lengjiayi */ @EqualsAndHashCode(callSuper = true) @Data @@ -46,9 +45,9 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam { @Builder.Default private long firstPackageTimeout = -1; /** - * the following parameters take effect - * only in CosyVoice V3 and later versions. - * instruction for synthesis. */ + * the following parameters take effect only in CosyVoice V3 and later versions. instruction for + * synthesis. + */ @Builder.Default private String instruction = null; /** random seed. */ @Builder.Default private int seed = 0; @@ -57,7 +56,6 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam { /** synthesis style */ @Builder.Default private int style = 0; - @Override public Map getParameters() { Map params = new HashMap<>(); diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizer.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizer.java index 38230d0..8b382ac 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizer.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizer.java @@ -9,7 +9,6 @@ import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.protocol.*; -import com.alibaba.dashscope.threads.runs.Run; import com.google.gson.JsonObject; import io.reactivex.BackpressureStrategy; import io.reactivex.Emitter; @@ -673,8 +672,7 @@ public void streamingCall(String text) { * @return If a callback is not set during initialization, the complete audio is returned as the * function's return value. Otherwise, the return value is null. */ - public ByteBuffer call(String text, long timeoutMillis) - throws RuntimeException { + public ByteBuffer call(String text, long timeoutMillis) throws RuntimeException { if (this.callback == null) { this.callback = new ResultCallback() { diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java index 7d6b01c..a69ab70 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java @@ -4,12 +4,10 @@ import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.utils.ApiKeywords; import com.alibaba.dashscope.utils.JsonUtils; -import com.google.gson.JsonArray; import com.google.gson.JsonObject; import java.nio.ByteBuffer; import java.security.InvalidParameterException; import java.util.List; - import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.SuperBuilder; diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java index 18b41ff..df3246c 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java @@ -74,8 +74,7 @@ public Voice createVoice(String targetModel, String prefix, String url) * @throws InputRequiredException 如果必须参数为空 */ public Voice[] listVoice(String prefix) throws NoApiKeyException, InputRequiredException { - return listVoice( - prefix, 0, 10, VoiceEnrollmentParam.builder().model(this.model).build()); + return listVoice(prefix, 0, 10, VoiceEnrollmentParam.builder().model(this.model).build()); } /** @@ -91,10 +90,7 @@ public Voice[] listVoice(String prefix) throws NoApiKeyException, InputRequiredE public Voice[] listVoice(String prefix, int pageIndex, int pageSize) throws NoApiKeyException, InputRequiredException { return listVoice( - prefix, - pageIndex, - pageSize, - VoiceEnrollmentParam.builder().model(this.model).build()); + prefix, pageIndex, pageSize, VoiceEnrollmentParam.builder().model(this.model).build()); } /** @@ -106,8 +102,7 @@ public Voice[] listVoice(String prefix, int pageIndex, int pageSize) * @throws InputRequiredException 如果必须参数为空 */ public Voice queryVoice(String voiceId) throws NoApiKeyException, InputRequiredException { - return queryVoice( - voiceId, VoiceEnrollmentParam.builder().model(this.model).build()); + return queryVoice(voiceId, VoiceEnrollmentParam.builder().model(this.model).build()); } /** @@ -120,8 +115,7 @@ public Voice queryVoice(String voiceId) throws NoApiKeyException, InputRequiredE */ public void updateVoice(String voiceId, String url) throws NoApiKeyException, InputRequiredException { - updateVoice( - voiceId, url, VoiceEnrollmentParam.builder().model(this.model).build()); + updateVoice(voiceId, url, VoiceEnrollmentParam.builder().model(this.model).build()); } /** diff --git a/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java b/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java index 8c9905f..37dd01c 100644 --- a/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java +++ b/src/main/java/com/alibaba/dashscope/common/MessageAdapter.java @@ -145,18 +145,17 @@ private MessageContentText parseTextContent(LinkedTreeMap conten // Parse cache_control if present if (contentItem.containsKey(ApiKeywords.CONTENT_TYPE_CACHE_CONTROL)) { LinkedTreeMap cacheControlMap = - (LinkedTreeMap) contentItem.get( - ApiKeywords.CONTENT_TYPE_CACHE_CONTROL); + (LinkedTreeMap) contentItem.get(ApiKeywords.CONTENT_TYPE_CACHE_CONTROL); MessageContentText.CacheControl.CacheControlBuilder cacheBuilder = - MessageContentText.CacheControl.builder() - .type((String) cacheControlMap.get("type")); + MessageContentText.CacheControl.builder().type((String) cacheControlMap.get("type")); // Handle ttl field - convert to String regardless of input type if (cacheControlMap.containsKey("ttl")) { Object ttlObj = cacheControlMap.get("ttl"); - cacheBuilder.ttl(ttlObj instanceof Number - ? String.valueOf(((Number) ttlObj).intValue()) - : String.valueOf(ttlObj)); + cacheBuilder.ttl( + ttlObj instanceof Number + ? String.valueOf(((Number) ttlObj).intValue()) + : String.valueOf(ttlObj)); } textBuilder.cacheControl(cacheBuilder.build()); } @@ -165,13 +164,10 @@ private MessageContentText parseTextContent(LinkedTreeMap conten // Parse image_url content @SuppressWarnings({"unchecked", "rawtypes"}) - private MessageContentImageURL parseImageContent( - LinkedTreeMap contentItem) { + private MessageContentImageURL parseImageContent(LinkedTreeMap contentItem) { LinkedTreeMap imageUrlMap = - (LinkedTreeMap) contentItem.get( - ApiKeywords.CONTENT_TYPE_IMAGE_URL); - ImageURL.ImageURLBuilder imageBuilder = - ImageURL.builder().url((String) imageUrlMap.get("url")); + (LinkedTreeMap) contentItem.get(ApiKeywords.CONTENT_TYPE_IMAGE_URL); + ImageURL.ImageURLBuilder imageBuilder = ImageURL.builder().url((String) imageUrlMap.get("url")); if (imageUrlMap.containsKey("detail")) { imageBuilder.detail((String) imageUrlMap.get("detail")); } diff --git a/src/main/java/com/alibaba/dashscope/common/MessageContentText.java b/src/main/java/com/alibaba/dashscope/common/MessageContentText.java index e2aa206..047d252 100644 --- a/src/main/java/com/alibaba/dashscope/common/MessageContentText.java +++ b/src/main/java/com/alibaba/dashscope/common/MessageContentText.java @@ -13,8 +13,8 @@ public class MessageContentText extends MessageContentBase { @Data @SuperBuilder public static class CacheControl { - private String type; - private String ttl; + private String type; + private String ttl; } @Builder.Default private String type = "text"; diff --git a/src/main/java/com/alibaba/dashscope/common/MultiModalMessage.java b/src/main/java/com/alibaba/dashscope/common/MultiModalMessage.java index aa9041e..32d8cc5 100644 --- a/src/main/java/com/alibaba/dashscope/common/MultiModalMessage.java +++ b/src/main/java/com/alibaba/dashscope/common/MultiModalMessage.java @@ -1,11 +1,10 @@ // Copyright (c) Alibaba, Inc. and its affiliates. package com.alibaba.dashscope.common; -import java.util.List; -import java.util.Map; - import com.alibaba.dashscope.tools.ToolCallBase; import com.google.gson.annotations.SerializedName; +import java.util.List; +import java.util.Map; import lombok.Data; import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; diff --git a/src/main/java/com/alibaba/dashscope/common/MultiModalMessageAdapter.java b/src/main/java/com/alibaba/dashscope/common/MultiModalMessageAdapter.java index d9cbedf..9327ea6 100644 --- a/src/main/java/com/alibaba/dashscope/common/MultiModalMessageAdapter.java +++ b/src/main/java/com/alibaba/dashscope/common/MultiModalMessageAdapter.java @@ -63,6 +63,7 @@ private void writeValue(JsonWriter out, Object value) throws IOException { out.value(value.toString()); } } + private void writeToolCallBase(JsonWriter writer, ToolCallBase toolCallBase) throws IOException { writer.beginObject(); @@ -103,14 +104,11 @@ private void writeToolCallBase(JsonWriter writer, ToolCallBase toolCallBase) thr // Convert LinkedTreeMap to ToolCallFunction @SuppressWarnings("unchecked") - private ToolCallFunction convertToCallFunction( - LinkedTreeMap toolCall) { + private ToolCallFunction convertToCallFunction(LinkedTreeMap toolCall) { ToolCallFunction functionCall = new ToolCallFunction(); if (toolCall.containsKey("function")) { - ToolCallFunction.CallFunction callFunction = - functionCall.new CallFunction(); - LinkedTreeMap fc = - (LinkedTreeMap) toolCall.get("function"); + ToolCallFunction.CallFunction callFunction = functionCall.new CallFunction(); + LinkedTreeMap fc = (LinkedTreeMap) toolCall.get("function"); if (fc.containsKey("name")) { callFunction.setName(fc.get("name").toString()); } @@ -135,7 +133,6 @@ private ToolCallFunction convertToCallFunction( return functionCall; } - @Override public void write(JsonWriter out, MultiModalMessage value) throws IOException { out.beginObject(); @@ -167,7 +164,8 @@ public void write(JsonWriter out, MultiModalMessage value) throws IOException { out.name(ApiKeywords.TOOL_CALLS); out.beginArray(); List toolCalls = value.getToolCalls(); - for (ToolCallBase tc : JsonUtils.fromJson(JsonUtils.toJson(toolCalls), ToolCallBase[].class)) { + for (ToolCallBase tc : + JsonUtils.fromJson(JsonUtils.toJson(toolCalls), ToolCallBase[].class)) { writeToolCallBase(out, tc); } out.endArray(); @@ -224,8 +222,7 @@ public MultiModalMessage read(JsonReader in) throws IOException { List toolCallsList = (List) toolCallsObj; // Check if need conversion for function type boolean needConversion = false; - if (!toolCallsList.isEmpty() && - toolCallsList.get(0) instanceof LinkedTreeMap) { + if (!toolCallsList.isEmpty() && toolCallsList.get(0) instanceof LinkedTreeMap) { LinkedTreeMap firstToolCall = (LinkedTreeMap) toolCallsList.get(0); if (firstToolCall.containsKey("type")) { diff --git a/src/main/java/com/alibaba/dashscope/common/Task.java b/src/main/java/com/alibaba/dashscope/common/Task.java index e09bd63..1f59843 100644 --- a/src/main/java/com/alibaba/dashscope/common/Task.java +++ b/src/main/java/com/alibaba/dashscope/common/Task.java @@ -23,5 +23,4 @@ public enum Task { Task(String value) { this.value = value; } - } diff --git a/src/main/java/com/alibaba/dashscope/common/TaskGroup.java b/src/main/java/com/alibaba/dashscope/common/TaskGroup.java index 7ffa369..541075c 100644 --- a/src/main/java/com/alibaba/dashscope/common/TaskGroup.java +++ b/src/main/java/com/alibaba/dashscope/common/TaskGroup.java @@ -17,5 +17,4 @@ public enum TaskGroup { TaskGroup(String value) { this.value = value; } - } diff --git a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingParam.java b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingParam.java index e562997..5653ef7 100644 --- a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingParam.java +++ b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingParam.java @@ -37,8 +37,7 @@ public String getValue() { public enum OutputType { DENSE("dense"), SPARSE("sparse"), - DENSE_AND_SPARSE("dense&sparse") - ; + DENSE_AND_SPARSE("dense&sparse"); private final String value; @@ -54,25 +53,26 @@ public String getValue() { @Singular private List texts; /** - * After the text is converted into a vector, it can be applied to downstream tasks such as retrieval, - * clustering, and classification. For asymmetric tasks such as retrieval, in order to achieve better - * retrieval results, it is recommended to distinguish between query text (query) and bottom database - * text (document) types, clustering Symmetric tasks such as , classification, etc. do not need to be - * specially specified, and the system default value "document" can be used - * */ + * After the text is converted into a vector, it can be applied to downstream tasks such as + * retrieval, clustering, and classification. For asymmetric tasks such as retrieval, in order to + * achieve better retrieval results, it is recommended to distinguish between query text (query) + * and bottom database text (document) types, clustering Symmetric tasks such as , classification, + * etc. do not need to be specially specified, and the system default value "document" can be used + */ private TextType textType; /** - * For specifying the output vector dimensions, which is applicable only to the text-embedding-v3 model and above - * versions. The specified value can only be selected from the six values: 1024, 768, 512, 256, 128, or 64, - * with 1024 as the default value. + * For specifying the output vector dimensions, which is applicable only to the text-embedding-v3 + * model and above versions. The specified value can only be selected from the six values: 1024, + * 768, 512, 256, 128, or 64, with 1024 as the default value. */ private Integer dimension; /** - * The user-specified output for discrete vector representation is only applicable to models of version - * text_embedding_v3 or above. The value can be chosen from dense, sparse, or dense&sparse, - * with dense as the default selection, resulting in the output of continuous vectors only. + * The user-specified output for discrete vector representation is only applicable to models of + * version text_embedding_v3 or above. The value can be chosen from dense, sparse, or + * dense&sparse, with dense as the default selection, resulting in the output of continuous + * vectors only. */ private OutputType outputType; diff --git a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingSparseEmbedding.java b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingSparseEmbedding.java index ba74326..1e094a4 100644 --- a/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingSparseEmbedding.java +++ b/src/main/java/com/alibaba/dashscope/embeddings/TextEmbeddingSparseEmbedding.java @@ -2,7 +2,6 @@ import lombok.Data; - @Data public class TextEmbeddingSparseEmbedding { private Integer index; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java index f383907..64c463b 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java @@ -10,18 +10,11 @@ import com.alibaba.dashscope.protocol.ConnectionOptions; import com.alibaba.dashscope.protocol.Protocol; import com.alibaba.dashscope.protocol.StreamingMode; -import com.alibaba.dashscope.utils.Constants; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; import io.reactivex.BackpressureStrategy; import io.reactivex.Emitter; import io.reactivex.Flowable; -import lombok.Builder; -import lombok.Getter; -import lombok.NonNull; -import lombok.experimental.SuperBuilder; -import lombok.extern.slf4j.Slf4j; - import java.nio.ByteBuffer; import java.util.LinkedList; import java.util.Objects; @@ -29,13 +22,16 @@ import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; +import lombok.Builder; +import lombok.Getter; +import lombok.NonNull; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; /** - * Multimodal Dialog class responsible for handling various operations in multimodal - * conversations. + * Multimodal Dialog class responsible for handling various operations in multimodal conversations. * - * author songsong.shao - * date 2025/4/24 + *

author songsong.shao date 2025/4/24 */ @Slf4j public class MultiModalDialog { @@ -55,8 +51,7 @@ public class MultiModalDialog { private MultiModalRequestParamWithStream requestParamWithStream; // Request parameter with stream - private State.DialogState currentState = - State.DialogState.IDLE; // Current dialogue state + private State.DialogState currentState = State.DialogState.IDLE; // Current dialogue state private String currentDialogId = ""; // Current dialogue ID @@ -67,8 +62,7 @@ private static class AsyncCmdBuffer { // Asynchronous command buffer class private String directive; // Directive type } - private final Queue DialogBuffer = - new LinkedList<>(); // Dialogue buffer queue + private final Queue DialogBuffer = new LinkedList<>(); // Dialogue buffer queue private AtomicReference stopLatch = new AtomicReference<>(null); // Stop signal latch @@ -99,9 +93,9 @@ public Flowable getStreamingData() { } public static MultiModalRequestParamWithStream FromMultiModalParam( - MultiModalRequestParam param, Flowable dataStream, String preRequestId) { + MultiModalRequestParam param, Flowable dataStream, String preRequestId) { ClientInfo clientInfo = param.getClientInfo(); - clientInfo.setSdk("dashscope-sdk-java "+ Version.version); + clientInfo.setSdk("dashscope-sdk-java " + Version.version); return MultiModalRequestParamWithStream.builder() .parameter("pre_task_id", preRequestId) .headers(param.getHeaders()) @@ -123,11 +117,9 @@ public static MultiModalRequestParamWithStream FromMultiModalParam( /** * Constructor initializes service options and creates a duplex communication API instance. * - * param: param Request parameter - * param: callback Callback interface + *

param: param Request parameter param: callback Callback interface */ - public MultiModalDialog( - MultiModalRequestParam param, MultiModalDialogCallback callback) { + public MultiModalDialog(MultiModalRequestParam param, MultiModalDialogCallback callback) { this.serviceOption = ApiServiceOption.builder() .protocol(Protocol.WEBSOCKET) @@ -143,43 +135,42 @@ public MultiModalDialog( this.callback = callback; connectionOptions = ConnectionOptions.builder().build(); this.connectionOptions.setUseDefaultClient(false); - this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions,serviceOption); + this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions, serviceOption); } - /** * Constructor initializes service options and creates a duplex communication API instance. * - * param: param Request parameter - * param: callback Callback interface - * param: connectionOptions Connection options + *

param: param Request parameter param: callback Callback interface param: connectionOptions + * Connection options */ public MultiModalDialog( - MultiModalRequestParam param, MultiModalDialogCallback callback, ConnectionOptions connectionOptions) { + MultiModalRequestParam param, + MultiModalDialogCallback callback, + ConnectionOptions connectionOptions) { this.serviceOption = - ApiServiceOption.builder() - .protocol(Protocol.WEBSOCKET) - .streamingMode(StreamingMode.DUPLEX) - .outputMode(OutputMode.ACCUMULATE) - .taskGroup(TaskGroup.AIGC.getValue()) - .task(Task.MULTIMODAL_GENERATION.getValue()) - .function(Function.GENERATION.getValue()) - .passTaskStarted(true) - .build(); + ApiServiceOption.builder() + .protocol(Protocol.WEBSOCKET) + .streamingMode(StreamingMode.DUPLEX) + .outputMode(OutputMode.ACCUMULATE) + .taskGroup(TaskGroup.AIGC.getValue()) + .task(Task.MULTIMODAL_GENERATION.getValue()) + .function(Function.GENERATION.getValue()) + .passTaskStarted(true) + .build(); this.connectionOptions = connectionOptions; this.connectionOptions.setUseDefaultClient(false); this.requestParam = param; this.callback = callback; - this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions,serviceOption); + this.duplexApi = new SynchronizeFullDuplexApi<>(this.connectionOptions, serviceOption); } /** * Constructor allows custom service options. * - * param: param Request parameter - * param: callback Callback interface - * param: serviceOption Custom service options + *

param: param Request parameter param: callback Callback interface param: serviceOption + * Custom service options */ public MultiModalDialog( MultiModalRequestParam param, @@ -196,11 +187,10 @@ public void start() { Flowable dataFrames = Flowable.create( emitter -> { // Creates data flow - synchronized ( - MultiModalDialog.this) { // Synchronized block ensures thread safety + synchronized (MultiModalDialog.this) { // Synchronized block ensures thread safety if (!DialogBuffer.isEmpty()) { // If dialogue buffer queue is not empty for (AsyncCmdBuffer buffer : - DialogBuffer) { // Iterates through each buffer in the queue + DialogBuffer) { // Iterates through each buffer in the queue if (buffer.isStop) { // If buffer marks stop, ends data flow emitter.onComplete(); return; @@ -221,12 +211,11 @@ public void start() { stopLatch = new AtomicReference<>(new CountDownLatch(1)); // Initializes stop signal latch - String preTaskId = requestParam.getTaskId() != null ? requestParam.getTaskId() : UUID.randomUUID().toString(); + String preTaskId = + requestParam.getTaskId() != null ? requestParam.getTaskId() : UUID.randomUUID().toString(); requestParamWithStream = MultiModalRequestParamWithStream.FromMultiModalParam( - this.requestParam, - dataFrames, - preTaskId); // Creates request parameter with stream + this.requestParam, dataFrames, preTaskId); // Creates request parameter with stream try { this.duplexApi.duplexCall( @@ -263,7 +252,8 @@ public void onEvent(DashScopeResult message) { sendFinishTaskMessage(); break; case "Error": - String error_code = output.has("error_code") ? output.get("error_code").getAsString() : ""; + String error_code = + output.has("error_code") ? output.get("error_code").getAsString() : ""; callback.onError( dialogId, error_code, @@ -301,7 +291,7 @@ public void onEvent(DashScopeResult message) { callback.onRespondingStarted(dialogId); // Response start event break; case "RespondingEnded": - callback.onRespondingEnded(dialogId,output); // Response end event + callback.onRespondingEnded(dialogId, output); // Response end event break; case "SpeechContent": callback.onSpeechContent(dialogId, output); // Speech content event @@ -312,12 +302,12 @@ public void onEvent(DashScopeResult message) { default: break; } - }else if (message.getEvent() != null) { - if (message.getEvent().equals("task-started")){ + } else if (message.getEvent() != null) { + if (message.getEvent().equals("task-started")) { callback.onConnected(); log.debug( - "MultiModalDialog connected, state is {}", - currentState.getValue()); // Logs connection status + "MultiModalDialog connected, state is {}", + currentState.getValue()); // Logs connection status } } } @@ -336,13 +326,13 @@ public void onError(Exception e) { // Error event handling ApiException apiException = (ApiException) e; // Casts exception to API exception if (apiException.getStatus().isJson()) { callback.onError( - apiException.getStatus().getRequestId(), - apiException.getStatus().getCode(), - apiException.getStatus().getMessage()); - }else { + apiException.getStatus().getRequestId(), + apiException.getStatus().getCode(), + apiException.getStatus().getMessage()); + } else { callback.onError(currentDialogId, "", apiException.getMessage()); } - }else { + } else { callback.onError(currentDialogId, "", e.getMessage()); } if (stopLatch.get() != null) { @@ -384,22 +374,20 @@ public void localRespondingEnded() { sendTextFrame("LocalRespondingEnded"); } -// /** Requests to speak. */ -// public void requestToSpeak() { -// sendTextFrame("RequestToSpeak"); -// } + // /** Requests to speak. */ + // public void requestToSpeak() { + // sendTextFrame("RequestToSpeak"); + // } - /** send heart beat request ,will respond heart beat*/ - public void sendHeartBeat(){ + /** send heart beat request ,will respond heart beat */ + public void sendHeartBeat() { sendTextFrame("HeartBeat"); } /** * Requests response. * - * param: type Response type - * param: text Response text - * param: updateParams Update parameters + *

param: type Response type param: text Response text param: updateParams Update parameters */ public void requestToRespond( String type, String text, MultiModalRequestParam.UpdateParams updateParams) { @@ -424,15 +412,15 @@ public void requestToRespond( /** * Updates information. * - * param: updateParams Update parameters + *

param: updateParams Update parameters */ public void updateInfo(MultiModalRequestParam.UpdateParams updateParams) { requestParamWithStream.clearParameters(); MultiModalRequestParam.CustomInput customInput = - MultiModalRequestParam.CustomInput.builder() - .directive("UpdateInfo") - .dialogId(currentDialogId) - .build(); + MultiModalRequestParam.CustomInput.builder() + .directive("UpdateInfo") + .dialogId(currentDialogId) + .build(); requestParamWithStream.setCustomInput(customInput); if (updateParams != null && updateParams.clientInfo != null) { requestParamWithStream.setClientInfo(updateParams.clientInfo); @@ -466,21 +454,21 @@ public void stop() { /** * Gets current dialogue state. * - * return: Current dialogue state + *

return: Current dialogue state */ public State.DialogState getDialogState() { return currentState; } -// /** Gets dialogue mode. */ -// public void getDialogMode() { -// // -// } + // /** Gets dialogue mode. */ + // public void getDialogMode() { + // // + // } /** * Sends audio frame. * - * param: audioFrame Audio frame data + *

param: audioFrame Audio frame data */ public void sendAudioData(ByteBuffer audioFrame) { if (audioFrame == null) { @@ -504,7 +492,7 @@ public void sendAudioData(ByteBuffer audioFrame) { /** * Sends text frame. * - * param: textFrame Text frame data + *

param: textFrame Text frame data */ private void sendTextFrame( String textFrame) { // Instruction type FullDuplex.getWebSocketPayload(data) diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java index 462e5cd..8b1cadc 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java @@ -1,9 +1,6 @@ package com.alibaba.dashscope.multimodal; -/** - * author songsong.shao - * date 2025/4/25 - */ +/** author songsong.shao date 2025/4/25 */ public class MultiModalDialogApiKeyWords { public static String CONST_AUDIO_FORMAT_PCM = "pcm"; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogCallback.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogCallback.java index 274b2fb..269e756 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogCallback.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogCallback.java @@ -1,98 +1,87 @@ package com.alibaba.dashscope.multimodal; import com.google.gson.JsonObject; - import java.nio.ByteBuffer; /** * Abstract class representing callbacks for multi-modal conversation events. * - * author songsong.shao - * date 2025/4/27 + *

author songsong.shao date 2025/4/27 */ public abstract class MultiModalDialogCallback { /** Called when the conversation is connected. */ public abstract void onConnected(); - /** - * Called when a conversation starts with a specific dialog ID. - */ + /** Called when a conversation starts with a specific dialog ID. */ public abstract void onStarted(String dialogId); - /** - * Called when a conversation stops with a specific dialog ID. - */ + /** Called when a conversation stops with a specific dialog ID. */ public abstract void onStopped(String dialogId); - /** - * Called when speech starts in a specific dialog. - */ + /** Called when speech starts in a specific dialog. */ public abstract void onSpeechStarted(String dialogId); - /** - * Called when speech ends in a specific dialog. - */ + /** Called when speech ends in a specific dialog. */ public abstract void onSpeechEnded(String dialogId); /** * Called when an error occurs during a conversation. * - * param dialogId The unique identifier for the dialog. - * param errorCode The error code associated with the error. - * param errorMsg The error message associated with the error. + *

param dialogId The unique identifier for the dialog. param errorCode The error code + * associated with the error. param errorMsg The error message associated with the error. */ public abstract void onError(String dialogId, String errorCode, String errorMsg); /** * Called when the conversation state changes. * - * param state The new state of the conversation. + *

param state The new state of the conversation. */ public abstract void onStateChanged(State.DialogState state); /** * Called when speech audio data is available. * - * param audioData The audio data as a ByteBuffer. + *

param audioData The audio data as a ByteBuffer. */ public abstract void onSpeechAudioData(ByteBuffer audioData); /** * Called when responding starts in a specific dialog. * - * param dialogId The unique identifier for the dialog. + *

param dialogId The unique identifier for the dialog. */ public abstract void onRespondingStarted(String dialogId); /** * Called when responding ends in a specific dialog. * - * param dialogId The unique identifier for the dialog. - * param content The content of the response as a JsonObject. + *

param dialogId The unique identifier for the dialog. param content The content of the + * response as a JsonObject. */ public abstract void onRespondingEnded(String dialogId, JsonObject content); /** * Called when responding content is available in a specific dialog. * - * param dialogId The unique identifier for the dialog. - * param content The content of the response as a JsonObject. + *

param dialogId The unique identifier for the dialog. param content The content of the + * response as a JsonObject. */ public abstract void onRespondingContent(String dialogId, JsonObject content); /** * Called when speech content is available in a specific dialog. * - * param dialogId The unique identifier for the dialog. - * param content The content of the speech as a JsonObject. + *

param dialogId The unique identifier for the dialog. param content The content of the speech + * as a JsonObject. */ public abstract void onSpeechContent(String dialogId, JsonObject content); /** * Called when a request is accepted in a specific dialog. * - * param dialogId The unique identifier for the dialog. + *

param dialogId The unique identifier for the dialog. */ public abstract void onRequestAccepted(String dialogId); diff --git a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java index 9f6e908..fa75ebf 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java @@ -1,22 +1,17 @@ package com.alibaba.dashscope.multimodal; -/** - * author songsong.shao - * date 2025/4/24 - */ +/** author songsong.shao date 2025/4/24 */ +import static com.alibaba.dashscope.multimodal.MultiModalDialogApiKeyWords.*; import com.alibaba.dashscope.base.FullDuplexServiceParam; import io.reactivex.Flowable; -import lombok.*; -import lombok.experimental.SuperBuilder; -import org.jetbrains.annotations.NotNull; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; - -import static com.alibaba.dashscope.multimodal.MultiModalDialogApiKeyWords.*; +import lombok.*; +import lombok.experimental.SuperBuilder; +import org.jetbrains.annotations.NotNull; @SuperBuilder @EqualsAndHashCode(callSuper = true) @@ -61,7 +56,7 @@ public static class UpStream { private AsrPostProcessing asrPostProcessing; @Builder.Default private Integer sampleRate = 16000; private String vocabularyId = null; - @Builder.Default private String audioFormat = CONST_AUDIO_FORMAT_PCM; //support pcm/opus + @Builder.Default private String audioFormat = CONST_AUDIO_FORMAT_PCM; // support pcm/opus private Map passThroughParams; @Builder @@ -86,16 +81,16 @@ public static class DownStream { @Builder.Default private String intermediateText = "transcript"; @Builder.Default private boolean debug = false; @Builder.Default private String type = "Audio"; - @Builder.Default private int volume = 50; //0~100 - @Builder.Default private int pitchRate = 100; //50~200 - @Builder.Default private int speechRate = 100; //50~200 - @Builder.Default private String audioFormat = "pcm"; //support pcm/mp3 + @Builder.Default private int volume = 50; // 0~100 + @Builder.Default private int pitchRate = 100; // 50~200 + @Builder.Default private int speechRate = 100; // 50~200 + @Builder.Default private String audioFormat = "pcm"; // support pcm/mp3 private Map passThroughParams; } @Builder public static class DialogAttributes { -// private String prompt; + // private String prompt; } @Builder @@ -166,7 +161,7 @@ public Map getParameters() { upStreamParams.put(CONST_NAME_UP_STREAM_TYPE, upStream.type); upStreamParams.put(CONST_NAME_UP_STREAM_MODE, upStream.mode); upStreamParams.put(CONST_NAME_UP_STREAM_AUDIO_FORMAT, upStream.audioFormat); - if (upStream.asrPostProcessing != null){ + if (upStream.asrPostProcessing != null) { final var asrPostProcessingParams = getUpstreamAsrPostProcessing(); if (!asrPostProcessingParams.isEmpty()) { upStreamParams.put(CONST_NAME_ASR_POST_PROCESSING, asrPostProcessingParams); @@ -217,19 +212,20 @@ public Map getParameters() { locationParams.put(CONST_NAME_CLIENT_INFO_LOCATION_CITY_NAME, clientInfo.location.cityName); locationParams.put(CONST_NAME_CLIENT_INFO_LOCATION_LATITUDE, clientInfo.location.latitude); locationParams.put( - CONST_NAME_CLIENT_INFO_LOCATION_LONGITUDE, clientInfo.location.longitude); + CONST_NAME_CLIENT_INFO_LOCATION_LONGITUDE, clientInfo.location.longitude); clientInfoParams.put(CONST_NAME_CLIENT_INFO_LOCATION, locationParams); } if (clientInfo.status != null) { clientInfoParams.put(CONST_NAME_CLIENT_INFO_STATUS, clientInfo.status); } - if (clientInfo.activeForegroundApp != null){ - clientInfoParams.put(CONST_NAME_CLIENT_INFO_ACTIVE_FOREGROUND_APP, clientInfo.activeForegroundApp); + if (clientInfo.activeForegroundApp != null) { + clientInfoParams.put( + CONST_NAME_CLIENT_INFO_ACTIVE_FOREGROUND_APP, clientInfo.activeForegroundApp); } if (clientInfo.passThroughParams != null) { clientInfoParams.putAll(clientInfo.passThroughParams); } - if (clientInfo.sdk != null){ + if (clientInfo.sdk != null) { clientInfoParams.put(CONST_NAME_CLIENT_INFO_SDK, clientInfo.sdk); } params.put(CONST_NAME_CLIENT_INFO, clientInfoParams); @@ -264,7 +260,7 @@ public Map getParameters() { if (images != null) { params.put(CONST_NAME_IMAGES, images); } - if (this.parameters != null){ + if (this.parameters != null) { params.putAll(this.parameters); } return params; @@ -275,7 +271,7 @@ public Map getParameters() { if (upStream.asrPostProcessing.replaceWords != null) { val replaceWords = new ArrayList>(); for (val replaceWord : upStream.asrPostProcessing.replaceWords) { - val replaceWordObj= new HashMap(); + val replaceWordObj = new HashMap(); replaceWordObj.put(CONST_NAME_REPLACE_WORD_SOURCE, replaceWord.source); replaceWordObj.put(CONST_NAME_REPLACE_WORD_TARGET, replaceWord.target); replaceWordObj.put(CONST_NAME_REPLACE_WORD_MATCH_MODE, replaceWord.matchMode); diff --git a/src/main/java/com/alibaba/dashscope/multimodal/State.java b/src/main/java/com/alibaba/dashscope/multimodal/State.java index 197711d..b701cfc 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/State.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/State.java @@ -2,10 +2,7 @@ import lombok.Getter; -/** - * author songsong.shao - * date 2025/4/27 - */ +/** author songsong.shao date 2025/4/27 */ @Getter public class State { @Getter @@ -25,7 +22,7 @@ public enum DialogState { /** * 状态机类,用于管理机器人的状态转换。 -- GETTER -- 获取当前状态。 * - * return 当前状态。 + *

return 当前状态。 */ private DialogState currentState; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWu.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWu.java index 6d7d5c5..fc865c0 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWu.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWu.java @@ -8,64 +8,59 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.protocol.*; -/** - * The tingwu client. - */ +/** The tingwu client. */ public final class TingWu { - private final SynchronizeHalfDuplexApi syncApi; - private final ApiServiceOption serviceOption; - private final String DEFAULT_BASE_HTTP_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"; + private final SynchronizeHalfDuplexApi syncApi; + private final ApiServiceOption serviceOption; + private final String DEFAULT_BASE_HTTP_URL = + "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"; - private ApiServiceOption defaultApiServiceOption() { - return ApiServiceOption.builder() - .protocol(Protocol.HTTP) - .httpMethod(HttpMethod.POST) - .isService(false) - .baseHttpUrl(DEFAULT_BASE_HTTP_URL) - .build(); - } - - public TingWu() { - serviceOption = defaultApiServiceOption(); - syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); - } + private ApiServiceOption defaultApiServiceOption() { + return ApiServiceOption.builder() + .protocol(Protocol.HTTP) + .httpMethod(HttpMethod.POST) + .isService(false) + .baseHttpUrl(DEFAULT_BASE_HTTP_URL) + .build(); + } - public TingWu(String protocol) { - serviceOption = defaultApiServiceOption(); - syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); - } + public TingWu() { + serviceOption = defaultApiServiceOption(); + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + } - public TingWu(String protocol, String baseUrl) { - serviceOption = defaultApiServiceOption(); - serviceOption.setProtocol(Protocol.of(protocol)); - if (protocol.equals(Protocol.HTTP.getValue())) { - serviceOption.setBaseHttpUrl(baseUrl); - } else { - serviceOption.setBaseWebSocketUrl(baseUrl); - } - syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); - } + public TingWu(String protocol) { + serviceOption = defaultApiServiceOption(); + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + } - public TingWu(String protocol, String baseUrl, ConnectionOptions connectionOptions) { - serviceOption = defaultApiServiceOption(); - serviceOption.setProtocol(Protocol.of(protocol)); - if (protocol.equals(Protocol.HTTP.getValue())) { - serviceOption.setBaseHttpUrl(baseUrl); - } else { - serviceOption.setBaseWebSocketUrl(baseUrl); - } - syncApi = new SynchronizeHalfDuplexApi<>(connectionOptions, serviceOption); + public TingWu(String protocol, String baseUrl) { + serviceOption = defaultApiServiceOption(); + serviceOption.setProtocol(Protocol.of(protocol)); + if (protocol.equals(Protocol.HTTP.getValue())) { + serviceOption.setBaseHttpUrl(baseUrl); + } else { + serviceOption.setBaseWebSocketUrl(baseUrl); } + syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); + } - - /** - * Call the server to get the whole result, only http protocol - */ - public DashScopeResult call(HalfDuplexServiceParam param) - throws ApiException, NoApiKeyException, InputRequiredException { - param.validate(); - serviceOption.setIsSSE(false); - return syncApi.call(param); + public TingWu(String protocol, String baseUrl, ConnectionOptions connectionOptions) { + serviceOption = defaultApiServiceOption(); + serviceOption.setProtocol(Protocol.of(protocol)); + if (protocol.equals(Protocol.HTTP.getValue())) { + serviceOption.setBaseHttpUrl(baseUrl); + } else { + serviceOption.setBaseWebSocketUrl(baseUrl); } + syncApi = new SynchronizeHalfDuplexApi<>(connectionOptions, serviceOption); + } + /** Call the server to get the whole result, only http protocol */ + public DashScopeResult call(HalfDuplexServiceParam param) + throws ApiException, NoApiKeyException, InputRequiredException { + param.validate(); + serviceOption.setIsSSE(false); + return syncApi.call(param); + } } diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuParam.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuParam.java index 39ac68e..546c898 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuParam.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuParam.java @@ -5,14 +5,13 @@ import com.alibaba.dashscope.utils.ApiKeywords; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import java.nio.ByteBuffer; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Setter; import lombok.experimental.SuperBuilder; -import java.nio.ByteBuffer; -import java.util.Map; - @Setter @SuperBuilder @EqualsAndHashCode(callSuper = true) @@ -20,14 +19,13 @@ public class TingWuParam extends HalfDuplexServiceParam { private Map input; + @Override public Map getInput() { return input; } - /** - * Get the websocket binary data, only for websocket binary input data. - **/ + /** Get the websocket binary data, only for websocket binary input data. */ @Override public ByteBuffer getBinaryData() { return null; diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtime.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtime.java index 81b283b..552edc2 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtime.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtime.java @@ -14,17 +14,16 @@ import io.reactivex.BackpressureStrategy; import io.reactivex.Emitter; import io.reactivex.Flowable; -import lombok.Builder; -import lombok.Getter; -import lombok.NonNull; -import lombok.experimental.SuperBuilder; -import lombok.extern.slf4j.Slf4j; - import java.nio.ByteBuffer; import java.util.LinkedList; import java.util.Queue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; +import lombok.Builder; +import lombok.Getter; +import lombok.NonNull; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; @Slf4j public final class TingWuRealtime { @@ -64,9 +63,9 @@ public Flowable getStreamingData() { public static TingWuRealtimeWithStream FromTingWuRealtimeParam( TingWuRealtimeParam param, Flowable audioStream, String preRequestId) { TingWuRealtimeWithStream tingWuRealtimeWithStream = - TingWuRealtimeWithStream.builder() + TingWuRealtimeWithStream.builder() .parameters((param.getParameters())) -// .parameter("pre_task_id", preRequestId) + // .parameter("pre_task_id", preRequestId) .headers(param.getHeaders()) .appId(param.getAppId()) .format(param.getFormat()) @@ -143,8 +142,7 @@ public TingWuRealtime(String baseUrl, ConnectionOptions connectionOptions) { * source language, target languages, etc. * @param callback ResultCallback */ - public void call( - TingWuRealtimeParam param, TingWuRealtimeCallback callback) { + public void call(TingWuRealtimeParam param, TingWuRealtimeCallback callback) { this.reset(); if (param == null) { throw new ApiException( @@ -180,41 +178,44 @@ public void call( } stopLatch = new AtomicReference<>(new CountDownLatch(1)); -// preRequestId = UUID.randomUUID().toString(); + // preRequestId = UUID.randomUUID().toString(); try { duplexApi.duplexCall( - TingWuRealtimeWithStream.FromTingWuRealtimeParam( - param, audioFrames, preRequestId), + TingWuRealtimeWithStream.FromTingWuRealtimeParam(param, audioFrames, preRequestId), new ResultCallback() { @Override public void onEvent(DashScopeResult message) { - log.debug("Response Result :" + message); + log.debug("Response Result :" + message); TingWuRealtimeResult tingWuRealtimeResult = - TingWuRealtimeResult.fromDashScopeResult(message); + TingWuRealtimeResult.fromDashScopeResult(message); - switch (tingWuRealtimeResult.getAction()){ + switch (tingWuRealtimeResult.getAction()) { case "speech-listen": // 建联后收到的第一个服务端返回 callback.onStarted(tingWuRealtimeResult.getTaskId()); synchronized (TingWuRealtime.this) { - isListenState = true; + isListenState = true; } - callback.onSpeechListen(tingWuRealtimeResult.getTaskId(),tingWuRealtimeResult.getOutput().get("dataId").getAsString()); + callback.onSpeechListen( + tingWuRealtimeResult.getTaskId(), + tingWuRealtimeResult.getOutput().get("dataId").getAsString()); break; case "task-failed": - callback.onError(tingWuRealtimeResult.getOutput().get("errorCode").getAsString(), - tingWuRealtimeResult.getOutput().get("errorMessage").getAsString()); + callback.onError( + tingWuRealtimeResult.getOutput().get("errorCode").getAsString(), + tingWuRealtimeResult.getOutput().get("errorMessage").getAsString()); break; case "recognize-result": - callback.onRecognizeResult(tingWuRealtimeResult.getTaskId(), tingWuRealtimeResult.getOutput()); + callback.onRecognizeResult( + tingWuRealtimeResult.getTaskId(), tingWuRealtimeResult.getOutput()); break; case "ai-result": - callback.onAiResult(tingWuRealtimeResult.getTaskId(), tingWuRealtimeResult.getOutput()); + callback.onAiResult( + tingWuRealtimeResult.getTaskId(), tingWuRealtimeResult.getOutput()); break; case "speech-end": callback.onStopped(tingWuRealtimeResult.getTaskId()); } - } @Override @@ -231,7 +232,7 @@ public void onComplete() { public void onError(Exception e) { ApiException apiException = new ApiException(e); apiException.setStackTrace(e.getStackTrace()); - callback.onError(apiException.getStatus().getCode(),apiException.getMessage()); + callback.onError(apiException.getStatus().getCode(), apiException.getMessage()); if (stopLatch.get() != null) { stopLatch.get().countDown(); } @@ -240,7 +241,7 @@ public void onError(Exception e) { } catch (NoApiKeyException e) { ApiException apiException = new ApiException(e); apiException.setStackTrace(e.getStackTrace()); - callback.onError(apiException.getStatus().getCode(),apiException.getMessage()); + callback.onError(apiException.getStatus().getCode(), apiException.getMessage()); if (stopLatch.get() != null) { stopLatch.get().countDown(); } @@ -263,7 +264,7 @@ public void sendAudioFrame(ByteBuffer audioFrame) { } log.debug("send audio frame: " + audioFrame.remaining()); synchronized (this) { - if (audioEmitter == null || !isListenState ) { + if (audioEmitter == null || !isListenState) { cmdBuffer.add(AsyncCmdBuffer.builder().audioFrame(audioFrame).build()); } else { audioEmitter.onNext(audioFrame); diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeCallback.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeCallback.java index 8a6566f..a7344b0 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeCallback.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeCallback.java @@ -5,50 +5,44 @@ /** * Abstract class representing callbacks for multi-modal conversation events. * - * author songsong.shao - * date 2025/4/27 + *

author songsong.shao date 2025/4/27 */ public abstract class TingWuRealtimeCallback { - /** - * Called when a conversation starts with a specific dialog ID. - */ + /** Called when a conversation starts with a specific dialog ID. */ public abstract void onStarted(String taskId); - /** - * Called when a conversation stops with a specific dialog ID. - */ + /** Called when a conversation stops with a specific dialog ID. */ public abstract void onStopped(String taskId); /** * Called when an error occurs during a conversation. * - * param errorCode The error code associated with the error. - * param errorMsg The error message associated with the error. + *

param errorCode The error code associated with the error. param errorMsg The error message + * associated with the error. */ public abstract void onError(String errorCode, String errorMsg); /** * Called when responding content is available in a specific dialog. * - * param taskId The unique identifier for the dialog. - * param content The content of the response as a JsonObject. + *

param taskId The unique identifier for the dialog. param content The content of the response + * as a JsonObject. */ public abstract void onAiResult(String taskId, JsonObject content); /** * Called when speech content is available in a specific dialog. * - * param taskId The unique identifier for the dialog. - * param content The content of the speech as a JsonObject. + *

param taskId The unique identifier for the dialog. param content The content of the speech + * as a JsonObject. */ public abstract void onRecognizeResult(String taskId, JsonObject content); /** * Called when a request is accepted in a specific dialog. * - * param taskId The unique identifier for the dialog. - * param dataId for this task + *

param taskId The unique identifier for the dialog. param dataId for this task */ public abstract void onSpeechListen(String taskId, String dataId); diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeParam.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeParam.java index 0d0d866..ce8791f 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeParam.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeParam.java @@ -1,19 +1,18 @@ package com.alibaba.dashscope.multimodal.tingwu; +import static com.alibaba.dashscope.multimodal.MultiModalDialogApiKeyWords.CONST_NAME_DIRECTIVE; + import com.alibaba.dashscope.base.FullDuplexServiceParam; import io.reactivex.Flowable; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; import lombok.experimental.SuperBuilder; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import static com.alibaba.dashscope.multimodal.MultiModalDialogApiKeyWords.CONST_NAME_DIRECTIVE; - @Getter @Setter @SuperBuilder @@ -29,10 +28,9 @@ public class TingWuRealtimeParam extends FullDuplexServiceParam { private Map input; public void clearParameters() { - input.clear(); + input.clear(); } - @Override public Map getHeaders() { return Collections.emptyMap(); @@ -40,34 +38,34 @@ public Map getHeaders() { @Override public Map getParameters() { - Map params = new HashMap<>(); - params.put("sampleRate", sampleRate); - params.put("format", format); - params.put("terminology", terminology); - if (maxEndSilence != null) { - params.put("maxEndSilence", maxEndSilence); - } - if (parameters != null) { - params.putAll(parameters); - } - return params; + Map params = new HashMap<>(); + params.put("sampleRate", sampleRate); + params.put("format", format); + params.put("terminology", terminology); + if (maxEndSilence != null) { + params.put("maxEndSilence", maxEndSilence); + } + if (parameters != null) { + params.putAll(parameters); + } + return params; } @Override public Map getInputs() { - if (input == null) { - input = new HashMap<>(); - } - input.put("appId", appId); + if (input == null) { + input = new HashMap<>(); + } + input.put("appId", appId); - return input; + return input; } public void setDirective(String directive) { - if (input == null) { - input = new HashMap<>(); - } - input.put(CONST_NAME_DIRECTIVE, directive); + if (input == null) { + input = new HashMap<>(); + } + input.put(CONST_NAME_DIRECTIVE, directive); } @Override diff --git a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeResult.java b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeResult.java index 8b62224..1d7b9cd 100644 --- a/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeResult.java +++ b/src/main/java/com/alibaba/dashscope/multimodal/tingwu/TingWuRealtimeResult.java @@ -22,7 +22,6 @@ public class TingWuRealtimeResult { private JsonObject usage; - public static TingWuRealtimeResult fromDashScopeResult(DashScopeResult dashScopeResult) throws ApiException { TingWuRealtimeResult result = new TingWuRealtimeResult(); diff --git a/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java b/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java index 32d5336..f159df7 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java +++ b/src/main/java/com/alibaba/dashscope/protocol/ClientProviders.java @@ -57,11 +57,11 @@ public static FullDuplexClient getFullDuplexClient( } else { if (connectionOptions.isUseDefaultClient()) { return new OkHttpWebSocketClient( - OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); - }else { + OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); + } else { // create custom client for audio models return new OkHttpWebSocketClientForAudio( - OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); + OkHttpClientFactory.getNewOkHttpClient(connectionOptions), passTaskStarted); } } } diff --git a/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java b/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java index 06476ea..d11e071 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java +++ b/src/main/java/com/alibaba/dashscope/protocol/ConnectionOptions.java @@ -93,5 +93,4 @@ public boolean isUseDefaultClient() { public void setUseDefaultClient(boolean useDefaultClient) { this.useDefaultClient = useDefaultClient; } - } diff --git a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java index 73c60ae..dc363af 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java +++ b/src/main/java/com/alibaba/dashscope/protocol/DashScopeHeaders.java @@ -31,8 +31,7 @@ public static String userAgent(String customUserAgent) { public static Map buildWebSocketHeaders( String apiKey, boolean isSecurityCheck, String workspace, Map customHeaders) throws NoApiKeyException { - return buildWebSocketHeaders(apiKey, isSecurityCheck, workspace, - customHeaders, null); + return buildWebSocketHeaders(apiKey, isSecurityCheck, workspace, customHeaders, null); } // Build WebSocket headers with optional custom user agent suffix @@ -67,8 +66,8 @@ public static Map buildHttpHeaders( String workspace, Map customHeaders) throws NoApiKeyException { - return buildHttpHeaders(apiKey, isSecurityCheck, protocol, isSSE, - isAsyncTask, workspace, customHeaders, null); + return buildHttpHeaders( + apiKey, isSecurityCheck, protocol, isSSE, isAsyncTask, workspace, customHeaders, null); } // Build HTTP headers with optional custom user agent suffix diff --git a/src/main/java/com/alibaba/dashscope/protocol/FullDuplexRequest.java b/src/main/java/com/alibaba/dashscope/protocol/FullDuplexRequest.java index 523acbe..c01dce0 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/FullDuplexRequest.java +++ b/src/main/java/com/alibaba/dashscope/protocol/FullDuplexRequest.java @@ -173,7 +173,7 @@ public JsonObject getFinishedTaskMessage(String taskId) { JsonObject payload = new JsonObject(); JsonObject input = new JsonObject(); if (serviceOption.getTask().equals("multimodal-generation")) { - input.addProperty("directive","Stop"); + input.addProperty("directive", "Stop"); } payload.add("input", input); wsMessage.add(ApiKeywords.PAYLOAD, payload); diff --git a/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java b/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java index 8c125fd..df8e88e 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java +++ b/src/main/java/com/alibaba/dashscope/protocol/HalfDuplexRequest.java @@ -91,8 +91,8 @@ public HttpRequest getHttpRequest() throws NoApiKeyException, ApiException { // Extract and filter custom user agent from param headers Map paramHeaders = param.getHeaders(); String customUserAgent = paramHeaders != null ? paramHeaders.get("user-agent") : null; - Map filteredHeaders = paramHeaders != null ? - new java.util.HashMap<>(paramHeaders) : new java.util.HashMap<>(); + Map filteredHeaders = + paramHeaders != null ? new java.util.HashMap<>(paramHeaders) : new java.util.HashMap<>(); filteredHeaders.remove("user-agent"); Map requestHeaders = diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java index f90076c..8d6d165 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpClientFactory.java @@ -75,11 +75,12 @@ public static OkHttpClient getOkHttpClient() { } public static OkHttpClient getNewOkHttpClient(ConnectionOptions connectionOptions) { - Builder builder = Holder.INSTANCE - .newBuilder() - .connectTimeout(connectionOptions.getConnectTimeout()) - .readTimeout(connectionOptions.getReadTimeout()) - .writeTimeout(connectionOptions.getWriteTimeout()); + Builder builder = + Holder.INSTANCE + .newBuilder() + .connectTimeout(connectionOptions.getConnectTimeout()) + .readTimeout(connectionOptions.getReadTimeout()) + .writeTimeout(connectionOptions.getWriteTimeout()); // Configure proxy if available if (connectionOptions.getProxy() != null) { diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java index cf3f703..bb13dae 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClient.java @@ -63,10 +63,9 @@ private Request buildConnectionRequest( String baseWebSocketUrl) throws NoApiKeyException { // Extract and filter custom user agent from param headers - String customUserAgent = customHeaders != null ? - customHeaders.get("user-agent") : null; - Map filteredHeaders = customHeaders != null ? - new java.util.HashMap<>(customHeaders) : new java.util.HashMap<>(); + String customUserAgent = customHeaders != null ? customHeaders.get("user-agent") : null; + Map filteredHeaders = + customHeaders != null ? new java.util.HashMap<>(customHeaders) : new java.util.HashMap<>(); filteredHeaders.remove("user-agent"); // build the request builder. diff --git a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java index b7b38f1..6b13d30 100644 --- a/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java +++ b/src/main/java/com/alibaba/dashscope/protocol/okhttp/OkHttpWebSocketClientForAudio.java @@ -5,160 +5,170 @@ import com.google.gson.JsonObject; import io.reactivex.Flowable; import io.reactivex.functions.Action; -import lombok.extern.slf4j.Slf4j; -import okhttp3.OkHttpClient; -import okio.ByteString; - import java.nio.ByteBuffer; import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import lombok.extern.slf4j.Slf4j; +import okhttp3.OkHttpClient; +import okio.ByteString; -/** - * @author songsong.shao - */ +/** @author songsong.shao */ @Slf4j public class OkHttpWebSocketClientForAudio extends OkHttpWebSocketClient { - private static final AtomicInteger STREAMING_REQUEST_THREAD_NUM = new AtomicInteger(0); - private static final AtomicBoolean SHUTDOWN_INITIATED = new AtomicBoolean(false); + private static final AtomicInteger STREAMING_REQUEST_THREAD_NUM = new AtomicInteger(0); + private static final AtomicBoolean SHUTDOWN_INITIATED = new AtomicBoolean(false); - private static final ExecutorService STREAMING_REQUEST_EXECUTOR = - new ThreadPoolExecutor(1, 100, 60L, TimeUnit.SECONDS, new SynchronousQueue<>(), r -> { - Thread t = new Thread(r, "WS-STREAMING-REQ-Worker-" + STREAMING_REQUEST_THREAD_NUM.updateAndGet(n -> n == Integer.MAX_VALUE ? 0 : n + 1)); - t.setDaemon(true); - return t; - }); + private static final ExecutorService STREAMING_REQUEST_EXECUTOR = + new ThreadPoolExecutor( + 1, + 100, + 60L, + TimeUnit.SECONDS, + new SynchronousQueue<>(), + r -> { + Thread t = + new Thread( + r, + "WS-STREAMING-REQ-Worker-" + + STREAMING_REQUEST_THREAD_NUM.updateAndGet( + n -> n == Integer.MAX_VALUE ? 0 : n + 1)); + t.setDaemon(true); + return t; + }); - public OkHttpWebSocketClientForAudio(OkHttpClient client, boolean passTaskStarted) { - super(client, passTaskStarted); - log.info("Use OkHttpWebSocketClientForAudio"); - } + public OkHttpWebSocketClientForAudio(OkHttpClient client, boolean passTaskStarted) { + super(client, passTaskStarted); + log.info("Use OkHttpWebSocketClientForAudio"); + } - @Override - protected CompletableFuture sendStreamRequest(FullDuplexRequest req) { - CompletableFuture future = - CompletableFuture.runAsync( - () -> { - try { - isFirstMessage.set(false); + @Override + protected CompletableFuture sendStreamRequest(FullDuplexRequest req) { + CompletableFuture future = + CompletableFuture.runAsync( + () -> { + try { + isFirstMessage.set(false); - JsonObject startMessage = req.getStartTaskMessage(); - log.info("send run-task request {}", JsonUtils.toJson(startMessage)); - String taskId = - startMessage.get("header").getAsJsonObject().get("task_id").getAsString(); - // send start message out. - sendTextWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - JsonUtils.toJson(startMessage), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); + JsonObject startMessage = req.getStartTaskMessage(); + log.info("send run-task request {}", JsonUtils.toJson(startMessage)); + String taskId = + startMessage.get("header").getAsJsonObject().get("task_id").getAsString(); + // send start message out. + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(startMessage), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); - Flowable streamingData = req.getStreamingData(); - streamingData.subscribe( - data -> { - try { - if (data instanceof String) { - JsonObject continueData = req.getContinueMessage((String) data, taskId); - sendTextWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - JsonUtils.toJson(continueData), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); - } else if (data instanceof byte[]) { - sendBinaryWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - ByteString.of((byte[]) data), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); - } else if (data instanceof ByteBuffer) { - sendBinaryWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - ByteString.of((ByteBuffer) data), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); - } else { - JsonObject continueData = req.getContinueMessage(data, taskId); - sendTextWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - JsonUtils.toJson(continueData), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); - } - } catch (Throwable ex) { - log.error(String.format("sendStreamData exception: %s", ex.getMessage())); - responseEmitter.onError(ex); - } - }, - err -> { - log.error(String.format("Get stream data error!")); - responseEmitter.onError(err); - }, - new Action() { - @Override - public void run() throws Exception { - log.debug(String.format("Stream data send completed!")); - sendTextWithRetry( - req.getApiKey(), - req.isSecurityCheck(), - JsonUtils.toJson(req.getFinishedTaskMessage(taskId)), - req.getWorkspace(), - req.getHeaders(), - req.getBaseWebSocketUrl()); - } - }); - } catch (Throwable ex) { - log.error(String.format("sendStreamData exception: %s", ex.getMessage())); - responseEmitter.onError(ex); - } - },STREAMING_REQUEST_EXECUTOR); - return future; - } + Flowable streamingData = req.getStreamingData(); + streamingData.subscribe( + data -> { + try { + if (data instanceof String) { + JsonObject continueData = req.getContinueMessage((String) data, taskId); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(continueData), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else if (data instanceof byte[]) { + sendBinaryWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + ByteString.of((byte[]) data), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else if (data instanceof ByteBuffer) { + sendBinaryWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + ByteString.of((ByteBuffer) data), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } else { + JsonObject continueData = req.getContinueMessage(data, taskId); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(continueData), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } + } catch (Throwable ex) { + log.error(String.format("sendStreamData exception: %s", ex.getMessage())); + responseEmitter.onError(ex); + } + }, + err -> { + log.error(String.format("Get stream data error!")); + responseEmitter.onError(err); + }, + new Action() { + @Override + public void run() throws Exception { + log.debug(String.format("Stream data send completed!")); + sendTextWithRetry( + req.getApiKey(), + req.isSecurityCheck(), + JsonUtils.toJson(req.getFinishedTaskMessage(taskId)), + req.getWorkspace(), + req.getHeaders(), + req.getBaseWebSocketUrl()); + } + }); + } catch (Throwable ex) { + log.error(String.format("sendStreamData exception: %s", ex.getMessage())); + responseEmitter.onError(ex); + } + }, + STREAMING_REQUEST_EXECUTOR); + return future; + } - static {//auto close when jvm shutdown - Runtime.getRuntime().addShutdownHook(new Thread(OkHttpWebSocketClientForAudio::shutdownStreamingExecutor)); + static { // auto close when jvm shutdown + Runtime.getRuntime() + .addShutdownHook(new Thread(OkHttpWebSocketClientForAudio::shutdownStreamingExecutor)); + } + /** + * Shutdown the streaming request executor gracefully. This method should be called when the + * application is shutting down to ensure proper resource cleanup. + */ + private static void shutdownStreamingExecutor() { + if (!SHUTDOWN_INITIATED.compareAndSet(false, true)) { + log.debug("Shutdown already in progress"); + return; } - /** - * Shutdown the streaming request executor gracefully. - * This method should be called when the application is shutting down - * to ensure proper resource cleanup. - */ - private static void shutdownStreamingExecutor() { - if (!SHUTDOWN_INITIATED.compareAndSet(false, true)) { - log.debug("Shutdown already in progress"); - return; - } - if (!STREAMING_REQUEST_EXECUTOR.isShutdown()) { - log.debug("Shutting down streaming request executor..."); - STREAMING_REQUEST_EXECUTOR.shutdown(); - try { - // Wait up to 60 seconds for existing tasks to terminate - if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { - log.warn("Streaming request executor did not terminate in 60 seconds, forcing shutdown..."); - STREAMING_REQUEST_EXECUTOR.shutdownNow(); - // Wait up to 60 seconds for tasks to respond to being cancelled - if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { - log.error("Streaming request executor did not terminate"); - } - } - } catch (InterruptedException ie) { - // (Re-)Cancel if current thread also interrupted - STREAMING_REQUEST_EXECUTOR.shutdownNow(); - // Preserve interrupt status - Thread.currentThread().interrupt(); - } - log.info("Streaming request executor shut down completed"); + if (!STREAMING_REQUEST_EXECUTOR.isShutdown()) { + log.debug("Shutting down streaming request executor..."); + STREAMING_REQUEST_EXECUTOR.shutdown(); + try { + // Wait up to 60 seconds for existing tasks to terminate + if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { + log.warn( + "Streaming request executor did not terminate in 60 seconds, forcing shutdown..."); + STREAMING_REQUEST_EXECUTOR.shutdownNow(); + // Wait up to 60 seconds for tasks to respond to being cancelled + if (!STREAMING_REQUEST_EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) { + log.error("Streaming request executor did not terminate"); + } } + } catch (InterruptedException ie) { + // (Re-)Cancel if current thread also interrupted + STREAMING_REQUEST_EXECUTOR.shutdownNow(); + // Preserve interrupt status + Thread.currentThread().interrupt(); + } + log.info("Streaming request executor shut down completed"); } + } } diff --git a/src/main/java/com/alibaba/dashscope/rerank/TextReRank.java b/src/main/java/com/alibaba/dashscope/rerank/TextReRank.java index 61a6883..5e56a47 100644 --- a/src/main/java/com/alibaba/dashscope/rerank/TextReRank.java +++ b/src/main/java/com/alibaba/dashscope/rerank/TextReRank.java @@ -9,10 +9,9 @@ import com.alibaba.dashscope.protocol.*; import lombok.extern.slf4j.Slf4j; - @Slf4j public final class TextReRank { - + private final SynchronizeHalfDuplexApi syncApi; private final ApiServiceOption serviceOption; @@ -54,8 +53,7 @@ public TextReRank(String protocol, String baseUrl) { syncApi = new SynchronizeHalfDuplexApi<>(serviceOption); } - public TextReRank( - String protocol, String baseUrl, ConnectionOptions connectionOptions) { + public TextReRank(String protocol, String baseUrl, ConnectionOptions connectionOptions) { serviceOption = defaultApiServiceOption(); serviceOption.setProtocol(Protocol.of(protocol)); if (Protocol.HTTP.getValue().equals(protocol)) { @@ -75,7 +73,7 @@ public TextReRank( * @throws ApiException The request failed, possibly due to a network or data error. */ public TextReRankResult call(TextReRankParam param) - throws ApiException, NoApiKeyException, InputRequiredException { + throws ApiException, NoApiKeyException, InputRequiredException { param.validate(); serviceOption.setIsSSE(false); serviceOption.setStreamingMode(StreamingMode.NONE); diff --git a/src/main/java/com/alibaba/dashscope/rerank/TextReRankParam.java b/src/main/java/com/alibaba/dashscope/rerank/TextReRankParam.java index 4e8afa5..76a31bf 100644 --- a/src/main/java/com/alibaba/dashscope/rerank/TextReRankParam.java +++ b/src/main/java/com/alibaba/dashscope/rerank/TextReRankParam.java @@ -26,22 +26,16 @@ public class TextReRankParam extends HalfDuplexServiceParam { /** The list of candidate documents to be reranked. Maximum 500 documents. */ @Singular private List documents; - /** - * The number of top documents to return. - * If not specified, returns all candidate documents. - * If top_n is greater than the number of input documents, returns all documents. + /** + * The number of top documents to return. If not specified, returns all candidate documents. If + * top_n is greater than the number of input documents, returns all documents. */ private Integer topN; - /** - * Whether to return the original document text in the results. - * Default is false. - */ + /** Whether to return the original document text in the results. Default is false. */ private Boolean returnDocuments; - /** - * The instruction for reranking. - */ + /** The instruction for reranking. */ private String instruct; @Override @@ -67,7 +61,7 @@ public JsonObject getInput() { @Override public Map getParameters() { Map params = new HashMap<>(); - + if (topN != null) { params.put("top_n", topN); } @@ -99,4 +93,4 @@ public void validate() throws InputRequiredException { throw new InputRequiredException("Documents must not be null or empty!"); } } -} \ No newline at end of file +} diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index f318b80..50eaba7 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -170,21 +170,21 @@ public class ApiKeywords { public static final String MASK_IMAGE_URL = "mask_image_url"; - public static final String FIRST_FRAME_URL = "first_frame_url"; + public static final String FIRST_FRAME_URL = "first_frame_url"; - public static final String LAST_FRAME_URL = "last_frame_url"; + public static final String LAST_FRAME_URL = "last_frame_url"; - public static final String HEAD_FRAME = "head_frame"; + public static final String HEAD_FRAME = "head_frame"; - public static final String TAIL_FRAME = "tail_frame"; + public static final String TAIL_FRAME = "tail_frame"; - public static final String RESOLUTION = "resolution"; + public static final String RESOLUTION = "resolution"; - public static final String WITH_AUDIO = "with_audio"; + public static final String WITH_AUDIO = "with_audio"; - public static final String PROMPT_EXTEND = "prompt_extend"; + public static final String PROMPT_EXTEND = "prompt_extend"; - public static final String WATERMARK = "watermark"; + public static final String WATERMARK = "watermark"; public static final String ANNOTATIONS = "annotations"; diff --git a/src/main/java/com/alibaba/dashscope/utils/Constants.java b/src/main/java/com/alibaba/dashscope/utils/Constants.java index 014a089..e77d128 100644 --- a/src/main/java/com/alibaba/dashscope/utils/Constants.java +++ b/src/main/java/com/alibaba/dashscope/utils/Constants.java @@ -30,9 +30,9 @@ public final class Constants { public static final String DASHSCOPE_WEBSOCKET_BASE_URL_ENV = "DASHSCOPE_WEBSOCKET_BASE_URL"; public static final String DASHSCOPE_WEBSOCKET_OMNI_BASE_URL_ENV = - "DASHSCOPE_WEBSOCKET_OMNI_BASE_URL"; + "DASHSCOPE_WEBSOCKET_OMNI_BASE_URL"; public static final String DASHSCOPE_WEBSOCKET_QWEN_TTS_REALTIME_BASE_URL_ENV = - "DASHSCOPE_WEBSOCKET_QWEN_TTS_REALTIME_BASE_URL"; + "DASHSCOPE_WEBSOCKET_QWEN_TTS_REALTIME_BASE_URL"; // Setting network layer logging, support: [NONE, BASIC, HEADERS, BODY] public static final String DASHSCOPE_NETWORK_LOGGING_LEVEL_ENV = "DASHSCOPE_NETWORK_LOGGING_LEVEL"; diff --git a/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java b/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java index 7b15fc2..d9927b9 100644 --- a/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java +++ b/src/main/java/com/alibaba/dashscope/utils/OSSUploadCertificate.java @@ -2,9 +2,7 @@ import lombok.Data; -/** - * OSS upload certificate for reuse across multiple file uploads. - */ +/** OSS upload certificate for reuse across multiple file uploads. */ @Data public class OSSUploadCertificate { private String uploadHost; @@ -26,9 +24,14 @@ public class OSSUploadCertificate { * @param xOssObjectAcl OSS object ACL * @param xOssForbidOverwrite OSS forbid overwrite flag */ - public OSSUploadCertificate(String uploadHost, String ossAccessKeyId, - String signature, String policy, String uploadDir, - String xOssObjectAcl, String xOssForbidOverwrite) { + public OSSUploadCertificate( + String uploadHost, + String ossAccessKeyId, + String signature, + String policy, + String uploadDir, + String xOssObjectAcl, + String xOssForbidOverwrite) { this.uploadHost = uploadHost; this.ossAccessKeyId = ossAccessKeyId; this.signature = signature; @@ -38,4 +41,3 @@ public OSSUploadCertificate(String uploadHost, String ossAccessKeyId, this.xOssForbidOverwrite = xOssForbidOverwrite; } } - diff --git a/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java b/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java index 56a245a..249aa41 100644 --- a/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java +++ b/src/main/java/com/alibaba/dashscope/utils/OSSUtils.java @@ -44,8 +44,7 @@ public final class OSSUtils { */ public static String upload(String model, String filePath, String apiKey) throws NoApiKeyException { - UploadResult result = uploadWithCertificate(model, filePath, apiKey, - null); + UploadResult result = uploadWithCertificate(model, filePath, apiKey, null); return result.getOssUrl(); } @@ -59,8 +58,8 @@ public static String upload(String model, String filePath, String apiKey) * @return UploadResult containing OSS URL and certificate * @throws NoApiKeyException If API key is missing */ - public static UploadResult uploadWithCertificate(String model, - String filePath, String apiKey, OSSUploadCertificate certificate) + public static UploadResult uploadWithCertificate( + String model, String filePath, String apiKey, OSSUploadCertificate certificate) throws NoApiKeyException { OkHttpClient client = OkHttpClientFactory.getOkHttpClient(); OSSUploadCertificate cert = certificate; @@ -68,17 +67,16 @@ public static UploadResult uploadWithCertificate(String model, // Get certificate if not provided if (cert == null) { DashScopeResult uploadInfo = get_upload_certificate(model, apiKey); - JsonObject outputData = ((JsonObject) uploadInfo.getOutput()) - .getAsJsonObject("data"); - cert = new OSSUploadCertificate( - outputData.get("upload_host").getAsString(), - outputData.get("oss_access_key_id").getAsString(), - outputData.get("signature").getAsString(), - outputData.get("policy").getAsString(), - outputData.get("upload_dir").getAsString(), - outputData.get("x_oss_object_acl").getAsString(), - outputData.get("x_oss_forbid_overwrite").getAsString() - ); + JsonObject outputData = ((JsonObject) uploadInfo.getOutput()).getAsJsonObject("data"); + cert = + new OSSUploadCertificate( + outputData.get("upload_host").getAsString(), + outputData.get("oss_access_key_id").getAsString(), + outputData.get("signature").getAsString(), + outputData.get("policy").getAsString(), + outputData.get("upload_dir").getAsString(), + outputData.get("x_oss_object_acl").getAsString(), + outputData.get("x_oss_forbid_overwrite").getAsString()); } Map headers = new HashMap<>(); @@ -110,8 +108,7 @@ public static UploadResult uploadWithCertificate(String model, RequestBody.create(MediaType.parse(getContentType(filePath)), uploadFile)) .build(); - Request request = new Request.Builder().url(host).post(requestBody) - .build(); + Request request = new Request.Builder().url(host).post(requestBody).build(); try (Response response = client.newCall(request).execute()) { if (!response.isSuccessful()) { Status status = parseFailed(response); diff --git a/src/main/java/com/alibaba/dashscope/utils/ParamUtils.java b/src/main/java/com/alibaba/dashscope/utils/ParamUtils.java index 9b01298..2c55fce 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ParamUtils.java +++ b/src/main/java/com/alibaba/dashscope/utils/ParamUtils.java @@ -2,74 +2,69 @@ public class ParamUtils { - /** - * Private constructor to prevent instantiation of utility class - */ - private ParamUtils() { - // Utility class should not be instantiated - } + /** Private constructor to prevent instantiation of utility class */ + private ParamUtils() { + // Utility class should not be instantiated + } - /** - * Check if the model is qwen{n} where n is greater than or equal to 3 - * - * @param modelName the model name to check - * @return true if model is qwen{n} where n is greater than or equal to 3, - * false otherwise - */ - public static boolean isQwenVersionThreeOrHigher(String modelName) { - if (modelName == null) { - return false; - } + /** + * Check if the model is qwen{n} where n is greater than or equal to 3 + * + * @param modelName the model name to check + * @return true if model is qwen{n} where n is greater than or equal to 3, false otherwise + */ + public static boolean isQwenVersionThreeOrHigher(String modelName) { + if (modelName == null) { + return false; + } - String lowerModelName = modelName.toLowerCase(); - if (!lowerModelName.startsWith("qwen")) { - return false; - } + String lowerModelName = modelName.toLowerCase(); + if (!lowerModelName.startsWith("qwen")) { + return false; + } - String remaining = lowerModelName.substring(4); - try { - // Extract the number after "qwen" - StringBuilder numberStr = new StringBuilder(); - for (char c : remaining.toCharArray()) { - if (Character.isDigit(c)) { - numberStr.append(c); - } else { - break; - } - } - if (numberStr.length() > 0) { - int version = Integer.parseInt(numberStr.toString()); - return version >= 3; - } - } catch (NumberFormatException e) { - // If parsing fails, use default behavior + String remaining = lowerModelName.substring(4); + try { + // Extract the number after "qwen" + StringBuilder numberStr = new StringBuilder(); + for (char c : remaining.toCharArray()) { + if (Character.isDigit(c)) { + numberStr.append(c); + } else { + break; } - - return false; + } + if (numberStr.length() > 0) { + int version = Integer.parseInt(numberStr.toString()); + return version >= 3; + } + } catch (NumberFormatException e) { + // If parsing fails, use default behavior } - /** - * Check if the increment_output parameter should be modified for the given - * model - * - * @param modelName the model name to check - * @return false if model contains "tts", "omni", or "qwen-deep-research", - * true otherwise - */ - public static boolean shouldModifyIncrementalOutput(String modelName) { - if (modelName == null) { - return true; - } + return false; + } - String lowerModelName = modelName.toLowerCase(); + /** + * Check if the increment_output parameter should be modified for the given model + * + * @param modelName the model name to check + * @return false if model contains "tts", "omni", or "qwen-deep-research", true otherwise + */ + public static boolean shouldModifyIncrementalOutput(String modelName) { + if (modelName == null) { + return true; + } - // Return false if model contains any of the specified strings - if (lowerModelName.contains("tts") || - lowerModelName.contains("omni") || - lowerModelName.contains("qwen-deep-research")) { - return false; - } + String lowerModelName = modelName.toLowerCase(); - return true; + // Return false if model contains any of the specified strings + if (lowerModelName.contains("tts") + || lowerModelName.contains("omni") + || lowerModelName.contains("qwen-deep-research")) { + return false; } + + return true; + } } diff --git a/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java b/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java index 0d3f0f3..78fd782 100644 --- a/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java +++ b/src/main/java/com/alibaba/dashscope/utils/PreprocessInputImage.java @@ -2,7 +2,6 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; - import java.io.File; import java.net.URI; import java.net.URISyntaxException; @@ -10,165 +9,149 @@ public final class PreprocessInputImage { - /** - * Check and upload multiple images with certificate reuse support. - * - * @param model Model name - * @param values Map of image values - * @param apiKey API key - * @param certificate Optional upload certificate for reuse - * @return CheckAndUploadImageResult containing upload status and cert - * @throws NoApiKeyException If API key is missing - * @throws UploadFileException If upload fails - */ - public static CheckAndUploadImageResult checkAndUploadImages( - String model, Map values, String apiKey, - OSSUploadCertificate certificate) - throws NoApiKeyException, UploadFileException { - boolean isUpload = false; - OSSUploadCertificate cert = certificate; - - for (Map.Entry entry : values.entrySet()) { - String v = entry.getValue(); - if (v == null || v.isEmpty()) { - continue; - } - CheckAndUploadOneImageResult result = - checkAndUploadOneImage(model, apiKey, v, cert); - if (!result.getFileUrl().equals(v)) { - isUpload = true; - entry.setValue(result.getFileUrl()); - } - cert = result.getCertificate(); - } - return new CheckAndUploadImageResult(isUpload, cert); + /** + * Check and upload multiple images with certificate reuse support. + * + * @param model Model name + * @param values Map of image values + * @param apiKey API key + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadImageResult containing upload status and cert + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadImageResult checkAndUploadImages( + String model, Map values, String apiKey, OSSUploadCertificate certificate) + throws NoApiKeyException, UploadFileException { + boolean isUpload = false; + OSSUploadCertificate cert = certificate; + + for (Map.Entry entry : values.entrySet()) { + String v = entry.getValue(); + if (v == null || v.isEmpty()) { + continue; + } + CheckAndUploadOneImageResult result = checkAndUploadOneImage(model, apiKey, v, cert); + if (!result.getFileUrl().equals(v)) { + isUpload = true; + entry.setValue(result.getFileUrl()); + } + cert = result.getCertificate(); } - - /** - * Check and upload multiple images without certificate reuse (legacy). - * - * @param model Model name - * @param values Map of image values - * @param apiKey API key - * @return true if any file was uploaded - * @throws NoApiKeyException If API key is missing - * @throws UploadFileException If upload fails - */ - public static boolean checkAndUploadImage( - String model, Map values, String apiKey) - throws NoApiKeyException, UploadFileException { - CheckAndUploadImageResult result = checkAndUploadImages(model, - values, apiKey, null); - return result.isUpload(); + return new CheckAndUploadImageResult(isUpload, cert); + } + + /** + * Check and upload multiple images without certificate reuse (legacy). + * + * @param model Model name + * @param values Map of image values + * @param apiKey API key + * @return true if any file was uploaded + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static boolean checkAndUploadImage(String model, Map values, String apiKey) + throws NoApiKeyException, UploadFileException { + CheckAndUploadImageResult result = checkAndUploadImages(model, values, apiKey, null); + return result.isUpload(); + } + + /** + * Check and upload one image with certificate reuse support. + * + * @param model Model name + * @param apiKey API key + * @param value Image file path + * @param certificate Optional upload certificate for reuse + * @return CheckAndUploadOneImageResult containing file URL and cert + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static CheckAndUploadOneImageResult checkAndUploadOneImage( + String model, String apiKey, String value, OSSUploadCertificate certificate) + throws NoApiKeyException, UploadFileException { + String dstValue = value; + OSSUploadCertificate cert = certificate; + + if (value.startsWith("http")) { + return new CheckAndUploadOneImageResult(dstValue, cert); } - /** - * Check and upload one image with certificate reuse support. - * - * @param model Model name - * @param apiKey API key - * @param value Image file path - * @param certificate Optional upload certificate for reuse - * @return CheckAndUploadOneImageResult containing file URL and cert - * @throws NoApiKeyException If API key is missing - * @throws UploadFileException If upload fails - */ - public static CheckAndUploadOneImageResult checkAndUploadOneImage( - String model, String apiKey, String value, - OSSUploadCertificate certificate) - throws NoApiKeyException, UploadFileException { - String dstValue = value; - OSSUploadCertificate cert = certificate; - - if (value.startsWith("http")){ - return new CheckAndUploadOneImageResult(dstValue, cert); - } - - if (value.startsWith(ApiKeywords.FILE_PATH_SCHEMA)) { - try { - URI fileURI = new URI(value); - File f = new File(fileURI); - if (f.exists()) { - UploadResult result = OSSUtils.uploadWithCertificate( - model, f.getAbsolutePath(), apiKey, cert); - if (result.getOssUrl().isEmpty()) { - throw new UploadFileException(String.format( - "Uploading file: %s failed", value)); - } - dstValue = result.getOssUrl(); - cert = result.getCertificate(); - } else { - throw new UploadFileException(String.format( - "Local file: %s not exists.", value)); - } - } catch (URISyntaxException e) { - throw new UploadFileException(e.getMessage()); - } + if (value.startsWith(ApiKeywords.FILE_PATH_SCHEMA)) { + try { + URI fileURI = new URI(value); + File f = new File(fileURI); + if (f.exists()) { + UploadResult result = + OSSUtils.uploadWithCertificate(model, f.getAbsolutePath(), apiKey, cert); + if (result.getOssUrl().isEmpty()) { + throw new UploadFileException(String.format("Uploading file: %s failed", value)); + } + dstValue = result.getOssUrl(); + cert = result.getCertificate(); + } else { + throw new UploadFileException(String.format("Local file: %s not exists.", value)); } - - return new CheckAndUploadOneImageResult(dstValue, cert); + } catch (URISyntaxException e) { + throw new UploadFileException(e.getMessage()); + } } - /** - * Check and upload one image without certificate reuse (legacy). - * - * @param model Model name - * @param apiKey API key - * @param value Image file path - * @return File URL - * @throws NoApiKeyException If API key is missing - * @throws UploadFileException If upload fails - */ - public static String checkAndUploadImage( - String model, String apiKey, String value) - throws NoApiKeyException, UploadFileException { - CheckAndUploadOneImageResult result = checkAndUploadOneImage(model, - apiKey, value, null); - return result.getFileUrl(); + return new CheckAndUploadOneImageResult(dstValue, cert); + } + + /** + * Check and upload one image without certificate reuse (legacy). + * + * @param model Model name + * @param apiKey API key + * @param value Image file path + * @return File URL + * @throws NoApiKeyException If API key is missing + * @throws UploadFileException If upload fails + */ + public static String checkAndUploadImage(String model, String apiKey, String value) + throws NoApiKeyException, UploadFileException { + CheckAndUploadOneImageResult result = checkAndUploadOneImage(model, apiKey, value, null); + return result.getFileUrl(); + } + + /** Result of check and upload image operation. */ + public static class CheckAndUploadImageResult { + private boolean upload; + private OSSUploadCertificate certificate; + + public CheckAndUploadImageResult(boolean upload, OSSUploadCertificate certificate) { + this.upload = upload; + this.certificate = certificate; } - /** - * Result of check and upload image operation. - */ - public static class CheckAndUploadImageResult { - private boolean upload; - private OSSUploadCertificate certificate; - - public CheckAndUploadImageResult(boolean upload, - OSSUploadCertificate certificate) { - this.upload = upload; - this.certificate = certificate; - } - - public boolean isUpload() { - return upload; - } + public boolean isUpload() { + return upload; + } - public OSSUploadCertificate getCertificate() { - return certificate; - } + public OSSUploadCertificate getCertificate() { + return certificate; } + } - /** - * Result of check and upload one image operation. - */ - public static class CheckAndUploadOneImageResult { - private String fileUrl; - private OSSUploadCertificate certificate; - - public CheckAndUploadOneImageResult(String fileUrl, - OSSUploadCertificate certificate) { - this.fileUrl = fileUrl; - this.certificate = certificate; - } + /** Result of check and upload one image operation. */ + public static class CheckAndUploadOneImageResult { + private String fileUrl; + private OSSUploadCertificate certificate; - public String getFileUrl() { - return fileUrl; - } + public CheckAndUploadOneImageResult(String fileUrl, OSSUploadCertificate certificate) { + this.fileUrl = fileUrl; + this.certificate = certificate; + } - public OSSUploadCertificate getCertificate() { - return certificate; - } + public String getFileUrl() { + return fileUrl; } + public OSSUploadCertificate getCertificate() { + return certificate; + } + } } diff --git a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java index 3dc187d..c69fea6 100644 --- a/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java +++ b/src/main/java/com/alibaba/dashscope/utils/PreprocessMessageInput.java @@ -5,7 +5,6 @@ import com.alibaba.dashscope.common.MultiModalMessage; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; - import java.io.File; import java.net.URI; import java.net.URISyntaxException; @@ -39,7 +38,9 @@ public static boolean isValidPath(String pathString) { * @throws UploadFileException If upload fails */ public static CheckAndUploadResult checkAndUpload( - String model, MultiModalMessageItemBase message, String apiKey, + String model, + MultiModalMessageItemBase message, + String apiKey, OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean isUpload = false; @@ -51,38 +52,33 @@ public static CheckAndUploadResult checkAndUpload( URI fileURI = new URI(message.getContent()); File f = new File(fileURI); if (f.exists()) { - UploadResult result = OSSUtils.uploadWithCertificate(model, - f.getAbsolutePath(), apiKey, cert); + UploadResult result = + OSSUtils.uploadWithCertificate(model, f.getAbsolutePath(), apiKey, cert); if (result.getOssUrl() == null) { throw new UploadFileException( - String.format("Uploading file: %s failed", - message.getContent())); + String.format("Uploading file: %s failed", message.getContent())); } message.setContent(result.getOssUrl()); cert = result.getCertificate(); isUpload = true; } else { throw new UploadFileException( - String.format("Local file: %s not exists.", - message.getContent())); + String.format("Local file: %s not exists.", message.getContent())); } } catch (URISyntaxException e) { throw new UploadFileException(e.getMessage()); } - } else if (!message.getModal().equals("text") - && message.getContent().startsWith("oss://")) { + } else if (!message.getModal().equals("text") && message.getContent().startsWith("oss://")) { isUpload = true; - } else if (!message.getModal().equals("text") - && !message.getContent().startsWith("http")) { + } else if (!message.getModal().equals("text") && !message.getContent().startsWith("http")) { if (isValidPath(message.getContent())) { File f = new File(message.getContent()); if (f.exists()) { - UploadResult result = OSSUtils.uploadWithCertificate(model, - f.getAbsolutePath(), apiKey, cert); + UploadResult result = + OSSUtils.uploadWithCertificate(model, f.getAbsolutePath(), apiKey, cert); if (result.getOssUrl() == null) { throw new UploadFileException( - String.format("Uploading file: %s failed", - message.getContent())); + String.format("Uploading file: %s failed", message.getContent())); } message.setContent(result.getOssUrl()); cert = result.getCertificate(); @@ -104,16 +100,14 @@ public static CheckAndUploadResult checkAndUpload( * @throws NoApiKeyException If API key is missing * @throws UploadFileException If upload fails */ - public static PreprocessResult - preProcessMessageInputs(String model, List messages, String apiKey, - OSSUploadCertificate certificate) + public static PreprocessResult preProcessMessageInputs( + String model, List messages, String apiKey, OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean hasUpload = false; OSSUploadCertificate cert = certificate; for (MultiModalMessageItemBase elem : messages) { - CheckAndUploadResult result = checkAndUpload(model, elem, apiKey, - cert); + CheckAndUploadResult result = checkAndUpload(model, elem, apiKey, cert); if (result.isUpload() && !hasUpload) { hasUpload = true; } @@ -132,11 +126,9 @@ public static CheckAndUploadResult checkAndUpload( * @throws NoApiKeyException If API key is missing * @throws UploadFileException If upload fails */ - public static boolean - preProcessMessageInputs(String model, List messages, String apiKey) - throws NoApiKeyException, UploadFileException { - PreprocessResult result = preProcessMessageInputs(model, messages, - apiKey, null); + public static boolean preProcessMessageInputs( + String model, List messages, String apiKey) throws NoApiKeyException, UploadFileException { + PreprocessResult result = preProcessMessageInputs(model, messages, apiKey, null); return result.hasUpload(); } @@ -153,8 +145,7 @@ public static CheckAndUploadResult checkAndUpload( * @throws UploadFileException If upload fails */ public static CheckAndUploadOneResult checkAndUploadOneMultiModalMessage( - String model, String apiKey, String key, String value, - OSSUploadCertificate certificate) + String model, String apiKey, String key, String value, OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { String dstValue = value; OSSUploadCertificate cert = certificate; @@ -164,17 +155,15 @@ public static CheckAndUploadOneResult checkAndUploadOneMultiModalMessage( URI fileURI = new URI(value); File f = new File(fileURI); if (f.exists()) { - UploadResult result = OSSUtils.uploadWithCertificate(model, - f.getAbsolutePath(), apiKey, cert); + UploadResult result = + OSSUtils.uploadWithCertificate(model, f.getAbsolutePath(), apiKey, cert); if (result.getOssUrl() == null) { - throw new UploadFileException(String.format( - "Uploading file: %s failed", value)); + throw new UploadFileException(String.format("Uploading file: %s failed", value)); } dstValue = result.getOssUrl(); cert = result.getCertificate(); } else { - throw new UploadFileException(String.format( - "Local file: %s not exists.", value)); + throw new UploadFileException(String.format("Local file: %s not exists.", value)); } } catch (URISyntaxException e) { throw new UploadFileException(e.getMessage()); @@ -183,11 +172,10 @@ public static CheckAndUploadOneResult checkAndUploadOneMultiModalMessage( if (isValidPath(value)) { File f = new File(value); if (f.exists()) { - UploadResult result = OSSUtils.uploadWithCertificate(model, - f.getAbsolutePath(), apiKey, cert); + UploadResult result = + OSSUtils.uploadWithCertificate(model, f.getAbsolutePath(), apiKey, cert); if (result.getOssUrl() == null) { - throw new UploadFileException(String.format( - "Uploading file: %s failed", value)); + throw new UploadFileException(String.format("Uploading file: %s failed", value)); } dstValue = result.getOssUrl(); cert = result.getCertificate(); @@ -212,8 +200,8 @@ public static CheckAndUploadOneResult checkAndUploadOneMultiModalMessage( public static String checkAndUploadOneMultiModalMessage( String model, String apiKey, String key, String value) throws NoApiKeyException, UploadFileException { - CheckAndUploadOneResult result = checkAndUploadOneMultiModalMessage( - model, apiKey, key, value, null); + CheckAndUploadOneResult result = + checkAndUploadOneMultiModalMessage(model, apiKey, key, value, null); return result.getFileUrl(); } @@ -229,7 +217,9 @@ public static String checkAndUploadOneMultiModalMessage( * @throws UploadFileException If upload fails */ public static CheckAndUploadResult checkAndUploadMultiModalMessage( - String model, Map.Entry entry, String apiKey, + String model, + Map.Entry entry, + String apiKey, OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean isUpload = false; @@ -242,12 +232,11 @@ public static CheckAndUploadResult checkAndUploadMultiModalMessage( for (int i = 0; i < dstValue.size(); i++) { Object v = dstValue.get(i); if (v instanceof String) { - if (!key.equals("text") && ((String)v).startsWith("oss://")) { + if (!key.equals("text") && ((String) v).startsWith("oss://")) { isUpload = true; } else { CheckAndUploadOneResult result = - checkAndUploadOneMultiModalMessage(model, apiKey, key, - (String) v, cert); + checkAndUploadOneMultiModalMessage(model, apiKey, key, (String) v, cert); if (!result.getFileUrl().equals(v)) { isUpload = true; ((List) dstValue).set(i, result.getFileUrl()); @@ -258,12 +247,11 @@ public static CheckAndUploadResult checkAndUploadMultiModalMessage( } entry.setValue(dstValue); } else if (value instanceof String) { - if (!key.equals("text") && ((String)value).startsWith("oss://")) { + if (!key.equals("text") && ((String) value).startsWith("oss://")) { isUpload = true; } else { CheckAndUploadOneResult result = - checkAndUploadOneMultiModalMessage(model, apiKey, key, - (String) value, cert); + checkAndUploadOneMultiModalMessage(model, apiKey, key, (String) value, cert); if (!result.getFileUrl().equals(value)) { isUpload = true; entry.setValue(result.getFileUrl()); @@ -287,8 +275,7 @@ public static CheckAndUploadResult checkAndUploadMultiModalMessage( public static boolean checkAndUploadMultiModalMessage( String model, Map.Entry entry, String apiKey) throws NoApiKeyException, UploadFileException { - CheckAndUploadResult result = checkAndUploadMultiModalMessage(model, - entry, apiKey, null); + CheckAndUploadResult result = checkAndUploadMultiModalMessage(model, entry, apiKey, null); return result.isUpload(); } @@ -304,8 +291,7 @@ public static boolean checkAndUploadMultiModalMessage( * @throws UploadFileException If upload fails */ public static PreprocessResult preProcessMultiModalMessageInputs( - String model, MultiModalMessage messages, String apiKey, - OSSUploadCertificate certificate) + String model, MultiModalMessage messages, String apiKey, OSSUploadCertificate certificate) throws NoApiKeyException, UploadFileException { boolean hasUpload = false; OSSUploadCertificate cert = certificate; @@ -316,8 +302,7 @@ public static PreprocessResult preProcessMultiModalMessageInputs( } for (Map item : content) { for (Map.Entry entry : item.entrySet()) { - CheckAndUploadResult result = checkAndUploadMultiModalMessage( - model, entry, apiKey, cert); + CheckAndUploadResult result = checkAndUploadMultiModalMessage(model, entry, apiKey, cert); if (result.isUpload() && !hasUpload) { hasUpload = true; } @@ -329,9 +314,11 @@ public static PreprocessResult preProcessMultiModalMessageInputs( } public static PreprocessResult preProcessMultiModalMessageInputs( - String model, ImageGenerationMessage messages, String apiKey, - OSSUploadCertificate certificate) - throws NoApiKeyException, UploadFileException { + String model, + ImageGenerationMessage messages, + String apiKey, + OSSUploadCertificate certificate) + throws NoApiKeyException, UploadFileException { boolean hasUpload = false; OSSUploadCertificate cert = certificate; List> content = new ArrayList<>(); @@ -341,8 +328,7 @@ public static PreprocessResult preProcessMultiModalMessageInputs( } for (Map item : content) { for (Map.Entry entry : item.entrySet()) { - CheckAndUploadResult result = checkAndUploadMultiModalMessage( - model, entry, apiKey, cert); + CheckAndUploadResult result = checkAndUploadMultiModalMessage(model, entry, apiKey, cert); if (result.isUpload() && !hasUpload) { hasUpload = true; } @@ -366,20 +352,16 @@ public static PreprocessResult preProcessMultiModalMessageInputs( public static boolean preProcessMultiModalMessageInputs( String model, MultiModalMessage messages, String apiKey) throws NoApiKeyException, UploadFileException { - PreprocessResult result = preProcessMultiModalMessageInputs(model, - messages, apiKey, null); + PreprocessResult result = preProcessMultiModalMessageInputs(model, messages, apiKey, null); return result.hasUpload(); } - /** - * Result of check and upload operation. - */ + /** Result of check and upload operation. */ public static class CheckAndUploadResult { private boolean upload; private OSSUploadCertificate certificate; - public CheckAndUploadResult(boolean upload, - OSSUploadCertificate certificate) { + public CheckAndUploadResult(boolean upload, OSSUploadCertificate certificate) { this.upload = upload; this.certificate = certificate; } @@ -393,15 +375,12 @@ public OSSUploadCertificate getCertificate() { } } - /** - * Result of check and upload one operation. - */ + /** Result of check and upload one operation. */ public static class CheckAndUploadOneResult { private String fileUrl; private OSSUploadCertificate certificate; - public CheckAndUploadOneResult(String fileUrl, - OSSUploadCertificate certificate) { + public CheckAndUploadOneResult(String fileUrl, OSSUploadCertificate certificate) { this.fileUrl = fileUrl; this.certificate = certificate; } @@ -415,15 +394,12 @@ public OSSUploadCertificate getCertificate() { } } - /** - * Result of preprocess operation. - */ + /** Result of preprocess operation. */ public static class PreprocessResult { private boolean hasUpload; private OSSUploadCertificate certificate; - public PreprocessResult(boolean hasUpload, - OSSUploadCertificate certificate) { + public PreprocessResult(boolean hasUpload, OSSUploadCertificate certificate) { this.hasUpload = hasUpload; this.certificate = certificate; } diff --git a/src/main/java/com/alibaba/dashscope/utils/UploadResult.java b/src/main/java/com/alibaba/dashscope/utils/UploadResult.java index d3135ba..a22bc0f 100644 --- a/src/main/java/com/alibaba/dashscope/utils/UploadResult.java +++ b/src/main/java/com/alibaba/dashscope/utils/UploadResult.java @@ -2,9 +2,7 @@ import lombok.Data; -/** - * Result of file upload containing OSS URL and certificate. - */ +/** Result of file upload containing OSS URL and certificate. */ @Data public class UploadResult { private String ossUrl; @@ -21,4 +19,3 @@ public UploadResult(String ossUrl, OSSUploadCertificate certificate) { this.certificate = certificate; } } - diff --git a/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java b/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java index d0f4f4d..bc5db36 100644 --- a/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java +++ b/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java @@ -1,14 +1,16 @@ package com.alibaba.dashscope; +import static org.junit.Assert.assertEquals; + import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesis; import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisParam; import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisResult; -import com.alibaba.dashscope.aigc.imagesynthesis.SketchImageSynthesisParam; import com.alibaba.dashscope.exception.ApiException; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.utils.Constants; import com.alibaba.dashscope.utils.JsonUtils; +import java.io.IOException; import lombok.extern.slf4j.Slf4j; import okhttp3.MediaType; import okhttp3.mockwebserver.MockResponse; @@ -21,19 +23,15 @@ import org.junit.jupiter.api.parallel.ExecutionMode; import org.junitpioneer.jupiter.SetEnvironmentVariable; -import java.io.IOException; - -import static org.junit.Assert.assertEquals; - @Execution(ExecutionMode.SAME_THREAD) @Slf4j @SetEnvironmentVariable(key = "DASHSCOPE_API_KEY", value = "1234") public class TestImageSynthesis { private static final MediaType MEDIA_TYPE_APPLICATION_JSON = - MediaType.parse("application/json; charset=utf-8"); + MediaType.parse("application/json; charset=utf-8"); MockWebServer server; private String expectRequestBody = - "{\"model\":\"wanx2.1-imageedit\",\"input\":{\"prompt\":\"雄鹰自由自在的在蓝天白云下飞翔\",\"function\":\"description_edit_with_mask\",\"base_image_url\":\"https://www.xxx.cn/b.png\",\"mask_image_url\":\"https://www.xxx.cn/a.png\"},\"parameters\":{\"size\":\"1024*1024\",\"n\":4}}"; + "{\"model\":\"wanx2.1-imageedit\",\"input\":{\"prompt\":\"雄鹰自由自在的在蓝天白云下飞翔\",\"function\":\"description_edit_with_mask\",\"base_image_url\":\"https://www.xxx.cn/b.png\",\"mask_image_url\":\"https://www.xxx.cn/a.png\"},\"parameters\":{\"size\":\"1024*1024\",\"n\":4}}"; @BeforeEach public void before() { @@ -47,26 +45,26 @@ public void after() throws IOException { @Test public void testImageSynthesisNormal() - throws ApiException, NoApiKeyException, IOException, InterruptedException, + throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4}}"; server.enqueue( - new MockResponse() - .setBody(responseBody) - .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); + new MockResponse() + .setBody(responseBody) + .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); int port = server.getPort(); ImageSynthesis is = new ImageSynthesis(); ImageSynthesisParam param = - ImageSynthesisParam.builder() - .model(ImageSynthesis.Models.WANX_2_1_IMAGEEDIT) - .n(4) - .function("description_edit_with_mask") - .maskImageUrl("https://www.xxx.cn/a.png") - .baseImageUrl("https://www.xxx.cn/b.png") - .size("1024*1024") - .prompt("雄鹰自由自在的在蓝天白云下飞翔") - .build(); + ImageSynthesisParam.builder() + .model(ImageSynthesis.Models.WANX_2_1_IMAGEEDIT) + .n(4) + .function("description_edit_with_mask") + .maskImageUrl("https://www.xxx.cn/a.png") + .baseImageUrl("https://www.xxx.cn/b.png") + .size("1024*1024") + .prompt("雄鹰自由自在的在蓝天白云下飞翔") + .build(); Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port); ImageSynthesisResult result = is.asyncCall(param); String resultJson = JsonUtils.toJson(result); @@ -81,26 +79,26 @@ public void testImageSynthesisNormal() @Test public void testImageSynthesisUsageMore() - throws ApiException, NoApiKeyException, IOException, InterruptedException, + throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4,\"size\":\"1024*1024\"}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4,\"size\":\"1024*1024\"}}"; server.enqueue( - new MockResponse() - .setBody(responseBody) - .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); + new MockResponse() + .setBody(responseBody) + .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); int port = server.getPort(); ImageSynthesis is = new ImageSynthesis(); ImageSynthesisParam param = - ImageSynthesisParam.builder() - .model(ImageSynthesis.Models.WANX_2_1_IMAGEEDIT) - .n(4) - .function("description_edit_with_mask") - .maskImageUrl("https://www.xxx.cn/a.png") - .baseImageUrl("https://www.xxx.cn/b.png") - .size("1024*1024") - .prompt("雄鹰自由自在的在蓝天白云下飞翔") - .build(); + ImageSynthesisParam.builder() + .model(ImageSynthesis.Models.WANX_2_1_IMAGEEDIT) + .n(4) + .function("description_edit_with_mask") + .maskImageUrl("https://www.xxx.cn/a.png") + .baseImageUrl("https://www.xxx.cn/b.png") + .size("1024*1024") + .prompt("雄鹰自由自在的在蓝天白云下飞翔") + .build(); Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port); ImageSynthesisResult result = is.asyncCall(param); String resultJson = JsonUtils.toJson(result); diff --git a/src/test/java/com/alibaba/dashscope/TestMultiModalConversationQwenTTS.java b/src/test/java/com/alibaba/dashscope/TestMultiModalConversationQwenTTS.java index a74cb6e..1d6c20b 100644 --- a/src/test/java/com/alibaba/dashscope/TestMultiModalConversationQwenTTS.java +++ b/src/test/java/com/alibaba/dashscope/TestMultiModalConversationQwenTTS.java @@ -2,27 +2,27 @@ package com.alibaba.dashscope; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + import com.alibaba.dashscope.aigc.multimodalconversation.*; import com.alibaba.dashscope.exception.ApiException; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.exception.UploadFileException; import com.alibaba.dashscope.utils.Constants; +import java.io.IOException; import lombok.extern.slf4j.Slf4j; import okhttp3.MediaType; import okhttp3.mockwebserver.MockResponse; import okhttp3.mockwebserver.MockWebServer; import okhttp3.mockwebserver.RecordedRequest; -import okio.ByteString; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; import org.junitpioneer.jupiter.SetEnvironmentVariable; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import java.io.IOException; @Execution(ExecutionMode.SAME_THREAD) @Slf4j @@ -39,11 +39,11 @@ public void before() throws IOException { this.server.start(); String responseStr = - "{\"output\": {\"audio\": {\"data\": \"\", \"expires_at\": 1758187426, \"id\": \"audio_d8ab01f8-2793-4f65-a656-664e6e6c0d19\", \"url\": \"http://dashscope-result.demo.reuslt/abc\"}, \"finish_reason\": \"stop\"}, \"usage\": {\"characters\": 56}, \"request_id\": \"d8ab01f8-2793-4f65-a656-664e6e6c0d19\"}"; + "{\"output\": {\"audio\": {\"data\": \"\", \"expires_at\": 1758187426, \"id\": \"audio_d8ab01f8-2793-4f65-a656-664e6e6c0d19\", \"url\": \"http://dashscope-result.demo.reuslt/abc\"}, \"finish_reason\": \"stop\"}, \"usage\": {\"characters\": 56}, \"request_id\": \"d8ab01f8-2793-4f65-a656-664e6e6c0d19\"}"; server.enqueue( - new MockResponse() - .setBody(responseStr) - .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); + new MockResponse() + .setBody(responseStr) + .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); } @AfterEach @@ -62,11 +62,11 @@ public void testSendAndReceive() MultiModalConversationParam param = MultiModalConversationParam.builder() - .model("qwen-tts-latest") - .text("Today is a wonderful day to build something people love!") - .voice(AudioParameters.Voice.DYLAN) - .languageType("zh") - .build(); + .model("qwen-tts-latest") + .text("Today is a wonderful day to build something people love!") + .voice(AudioParameters.Voice.DYLAN) + .languageType("zh") + .build(); MultiModalConversationResult result = conv.call(param); RecordedRequest request = this.server.takeRequest(); String requestBody = request.getBody().readUtf8(); diff --git a/src/test/java/com/alibaba/dashscope/TestMultimodalDialog.java b/src/test/java/com/alibaba/dashscope/TestMultimodalDialog.java index 151e88e..05629e2 100644 --- a/src/test/java/com/alibaba/dashscope/TestMultimodalDialog.java +++ b/src/test/java/com/alibaba/dashscope/TestMultimodalDialog.java @@ -2,6 +2,9 @@ package com.alibaba.dashscope; +import static java.lang.Thread.sleep; +import static org.junit.Assert.assertThrows; + import com.alibaba.dashscope.multimodal.MultiModalDialog; import com.alibaba.dashscope.multimodal.MultiModalDialogCallback; import com.alibaba.dashscope.multimodal.MultiModalRequestParam; @@ -11,6 +14,8 @@ import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonObject; +import java.io.IOException; +import java.nio.ByteBuffer; import lombok.extern.slf4j.Slf4j; import okhttp3.Response; import okhttp3.WebSocket; @@ -24,13 +29,6 @@ import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; - -import static java.lang.Thread.sleep; -import static org.junit.Assert.assertThrows; - @Execution(ExecutionMode.SAME_THREAD) @Slf4j public class TestMultimodalDialog { @@ -40,7 +38,7 @@ public class TestMultimodalDialog { private static MultiModalDialog multiModalDialog; private static MockWebServer mockServer; private static State.DialogState currentState; - static int enterListeningTimes = 0; + static int enterListeningTimes = 0; @BeforeAll public static void before() throws IOException { @@ -72,10 +70,10 @@ public void onMessage(WebSocket webSocket, String string) { "{'header': {'task_id': '" + task_id + "', 'event': 'task-started', 'attributes': {}}, 'payload': {'output':{'event':'Started','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - webSocket.send( - "{'header': {'event': 'result-generated', 'task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Listening','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + webSocket.send( + "{'header': {'event': 'result-generated', 'task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Listening','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); } else if (string.contains("finish-task")) { webSocket.send( @@ -84,45 +82,40 @@ public void onMessage(WebSocket webSocket, String string) { + "', 'event': 'task-finished', 'attributes': {}}, 'payload': {'output': {'event':'stopped'}, 'usage': {'characters': 7}}}"); webSocket.close(1000, "close by server"); } else if (string.contains("continue-task")) { - if (string.contains("prompt")) { - webSocket.send( - "{'header': {'event': 'result-generated','task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Thinking','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - - webSocket.send( - "{'header': {'event': 'result-generated','task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Responding','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - - webSocket.send( - "{'header': {'event': 'result-generated','task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'RespondingStarted','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - - - } - - if (string.contains("LocalRespondingStarted")) { - byte[] binary = new byte[] {0x01, 0x01, 0x01}; - webSocket.send(new ByteString(binary)); + if (string.contains("prompt")) { + webSocket.send( + "{'header': {'event': 'result-generated','task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Thinking','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - webSocket.send( - "{'header': {'event': 'result-generated','task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'RespondingEnded','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); - } + webSocket.send( + "{'header': {'event': 'result-generated','task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Responding','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + webSocket.send( + "{'header': {'event': 'result-generated','task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'RespondingStarted','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + } - if (string.contains("LocalRespondingEnded")) { - //重新切换到Listening状态 - webSocket.send( - "{'header': {'event': 'result-generated','task_id': '" - + task_id - + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Listening','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + if (string.contains("LocalRespondingStarted")) { + byte[] binary = new byte[] {0x01, 0x01, 0x01}; + webSocket.send(new ByteString(binary)); - } + webSocket.send( + "{'header': {'event': 'result-generated','task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'RespondingEnded','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + } + if (string.contains("LocalRespondingEnded")) { + // 重新切换到Listening状态 + webSocket.send( + "{'header': {'event': 'result-generated','task_id': '" + + task_id + + "', 'attributes': {}}, 'payload': {'output':{'event':'DialogStateChanged','state':'Listening','dialog_id':'bfce4b14-32d8-48f1-b30d-c98789f319be'}}}"); + } } } }); @@ -146,143 +139,157 @@ public void testDialog() throws InterruptedException { // 在真实世界中,你会在这里做 HTTP 请求,并得到响应 System.out.println("Mock Server is running at: " + url); - MultiModalRequestParam params = - MultiModalRequestParam.builder() - .customInput( - MultiModalRequestParam.CustomInput.builder() - .workspaceId(workSpaceId) - .appId(appId) - .build()) - .upStream( - MultiModalRequestParam.UpStream.builder() - .mode("push2talk") - .audioFormat("pcm") - .build()) - .downStream( - MultiModalRequestParam.DownStream.builder() - .voice("longxiaochun_v2") - .sampleRate(48000) - .build()) - .clientInfo( - MultiModalRequestParam.ClientInfo.builder() - .userId("1234") - .device(MultiModalRequestParam.ClientInfo.Device.builder().uuid("device_1234").build()) - .build()) - .model(modelName) - .apiKey("api_key") - .build(); - multiModalDialog = new MultiModalDialog(params, getCallback()); - multiModalDialog.start(); - - while (currentState != State.DialogState.LISTENING) { - try { - sleep(100); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - // 模拟语音请求 - multiModalDialog.requestToRespond("prompt","讲个故事",null); - // 增加交互流程等待 - while (enterListeningTimes < 2) { - try { - sleep(2000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + MultiModalRequestParam params = + MultiModalRequestParam.builder() + .customInput( + MultiModalRequestParam.CustomInput.builder() + .workspaceId(workSpaceId) + .appId(appId) + .build()) + .upStream( + MultiModalRequestParam.UpStream.builder() + .mode("push2talk") + .audioFormat("pcm") + .build()) + .downStream( + MultiModalRequestParam.DownStream.builder() + .voice("longxiaochun_v2") + .sampleRate(48000) + .build()) + .clientInfo( + MultiModalRequestParam.ClientInfo.builder() + .userId("1234") + .device( + MultiModalRequestParam.ClientInfo.Device.builder() + .uuid("device_1234") + .build()) + .build()) + .model(modelName) + .apiKey("api_key") + .build(); + multiModalDialog = new MultiModalDialog(params, getCallback()); + multiModalDialog.start(); + + while (currentState != State.DialogState.LISTENING) { + try { + sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); } - multiModalDialog.stop(); + } + // 模拟语音请求 + multiModalDialog.requestToRespond("prompt", "讲个故事", null); + // 增加交互流程等待 + while (enterListeningTimes < 2) { try { - sleep(1000); + sleep(2000); } catch (InterruptedException e) { - throw new RuntimeException(e); + throw new RuntimeException(e); } + } + multiModalDialog.stop(); + try { + sleep(1000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } System.out.println("############ Start Test Dialog Done ############"); } + public static MultiModalDialogCallback getCallback() { + return new MultiModalDialogCallbackImpl(); + } + + public static class MultiModalDialogCallbackImpl extends MultiModalDialogCallback { + @Override + public void onConnected() {} - public static MultiModalDialogCallback getCallback() { - return new MultiModalDialogCallbackImpl(); + @Override + public void onStarted(String dialogId) { + log.info("onStarted: {}", dialogId); } - public static class MultiModalDialogCallbackImpl extends MultiModalDialogCallback { - @Override - public void onConnected() {} - @Override - public void onStarted(String dialogId) { - log.info("onStarted: {}", dialogId); - } - @Override - public void onStopped(String dialogId) { - log.info("onStopped: {}", dialogId); - } - @Override - public void onSpeechStarted(String dialogId) { - log.info("onSpeechStarted: {}", dialogId); - } - @Override - public void onSpeechEnded(String dialogId) { - log.info("onSpeechEnded: {}", dialogId); - } - @Override - public void onError(String dialogId, String errorCode, String errorMsg) { - log.error("onError: {}, {}, {}", dialogId, errorCode, errorMsg); - assertThrows(RuntimeException.class, () -> { - throw new RuntimeException(errorMsg); - }); - } - @Override - public void onStateChanged(State.DialogState state) { - currentState = state; - log.info("onStateChanged: {}", state); - if (currentState == State.DialogState.LISTENING) { - enterListeningTimes++; - log.info("enterListeningTimes: {}", enterListeningTimes); - } - } - @Override - public void onSpeechAudioData(ByteBuffer audioData) { - //write audio data to file - //or redirect to audio player - } - @Override - public void onRespondingStarted(String dialogId) { - log.info("onRespondingStarted: {}", dialogId); - multiModalDialog.localRespondingStarted(); - } + @Override + public void onStopped(String dialogId) { + log.info("onStopped: {}", dialogId); + } - @Override - public void onRespondingEnded(String dialogId, JsonObject content) { - log.info("onRespondingEnded: {}", dialogId); - multiModalDialog.localRespondingEnded(); - } + @Override + public void onSpeechStarted(String dialogId) { + log.info("onSpeechStarted: {}", dialogId); + } + @Override + public void onSpeechEnded(String dialogId) { + log.info("onSpeechEnded: {}", dialogId); + } - @Override - public void onRespondingContent(String dialogId, JsonObject content) { - log.info("onRespondingContent: {}, {}", dialogId, content); - if (content.has("extra_info")) { - JsonObject extraInfo = content.getAsJsonObject("extra_info"); - if (extraInfo.has("commands")) { - String commandsStr = extraInfo.get("commands").getAsString(); - log.info("commandsStr: {}", commandsStr); - //"[{\"name\":\"visual_qa\",\"params\":[{\"name\":\"shot\",\"value\":\"拍照看看\",\"normValue\":\"True\"}]}]" - JsonArray commands = new Gson().fromJson(commandsStr, JsonArray.class); - } - } - } - @Override - public void onSpeechContent(String dialogId, JsonObject content) { - log.info("onSpeechContent: {}, {}", dialogId, content); - } - @Override - public void onRequestAccepted(String dialogId) { - log.info("onRequestAccepted: {}", dialogId); - } - @Override - public void onClosed() { - log.info("onClosed"); + @Override + public void onError(String dialogId, String errorCode, String errorMsg) { + log.error("onError: {}, {}, {}", dialogId, errorCode, errorMsg); + assertThrows( + RuntimeException.class, + () -> { + throw new RuntimeException(errorMsg); + }); + } + + @Override + public void onStateChanged(State.DialogState state) { + currentState = state; + log.info("onStateChanged: {}", state); + if (currentState == State.DialogState.LISTENING) { + enterListeningTimes++; + log.info("enterListeningTimes: {}", enterListeningTimes); + } + } + + @Override + public void onSpeechAudioData(ByteBuffer audioData) { + // write audio data to file + // or redirect to audio player + } + + @Override + public void onRespondingStarted(String dialogId) { + log.info("onRespondingStarted: {}", dialogId); + multiModalDialog.localRespondingStarted(); + } + + @Override + public void onRespondingEnded(String dialogId, JsonObject content) { + log.info("onRespondingEnded: {}", dialogId); + multiModalDialog.localRespondingEnded(); + } + + @Override + public void onRespondingContent(String dialogId, JsonObject content) { + log.info("onRespondingContent: {}, {}", dialogId, content); + if (content.has("extra_info")) { + JsonObject extraInfo = content.getAsJsonObject("extra_info"); + if (extraInfo.has("commands")) { + String commandsStr = extraInfo.get("commands").getAsString(); + log.info("commandsStr: {}", commandsStr); + // "[{\"name\":\"visual_qa\",\"params\":[{\"name\":\"shot\",\"value\":\"拍照看看\",\"normValue\":\"True\"}]}]" + JsonArray commands = new Gson().fromJson(commandsStr, JsonArray.class); } + } + } + + @Override + public void onSpeechContent(String dialogId, JsonObject content) { + log.info("onSpeechContent: {}, {}", dialogId, content); + } + + @Override + public void onRequestAccepted(String dialogId) { + log.info("onRequestAccepted: {}", dialogId); + } + + @Override + public void onClosed() { + log.info("onClosed"); } + } } diff --git a/src/test/java/com/alibaba/dashscope/TestQwenTtsRealtime.java b/src/test/java/com/alibaba/dashscope/TestQwenTtsRealtime.java index eeb7120..3ac8cae 100644 --- a/src/test/java/com/alibaba/dashscope/TestQwenTtsRealtime.java +++ b/src/test/java/com/alibaba/dashscope/TestQwenTtsRealtime.java @@ -1,5 +1,7 @@ package com.alibaba.dashscope; +import static org.junit.jupiter.api.Assertions.assertEquals; + import com.alibaba.dashscope.audio.qwen_tts_realtime.QwenTtsRealtime; import com.alibaba.dashscope.audio.qwen_tts_realtime.QwenTtsRealtimeCallback; import com.alibaba.dashscope.audio.qwen_tts_realtime.QwenTtsRealtimeConfig; @@ -7,6 +9,9 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.utils.JsonUtils; import com.google.gson.JsonObject; +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; import okhttp3.Response; import okhttp3.WebSocket; @@ -21,124 +26,114 @@ import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; -import java.io.IOException; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicReference; - -import static org.junit.jupiter.api.Assertions.assertEquals; - @Execution(ExecutionMode.SAME_THREAD) @Slf4j public class TestQwenTtsRealtime { - private static MockWebServer mockServer; - - @BeforeAll - public static void before() throws IOException { - mockServer = new MockWebServer(); - mockServer.start(); - MockResponse response = - new MockResponse() - .withWebSocketUpgrade( - new WebSocketListener() { - String task_id = ""; - - @Override - public void onOpen(WebSocket webSocket, Response response) { - System.out.println("Mock Server onOpen"); - System.out.println( - "Mock Server request header:" + response.request().headers()); - System.out.println("Mock Server response header:" + response.headers()); - System.out.println("Mock Server response:" + response); - webSocket.send( - "{\"event_id\":\"event_RiXR49wQsjYDGIdFZZvwV\",\"type\":\"session.created\",\"session\":{\"object\":\"realtime.session\",\"mode\":\"server_commit\",\"model\":\"qwen-tts-realtime\",\"voice\":\"Cherry\",\"response_format\":\"pcm\",\"sample_rate\":24000,\"id\":\"sess_Wf61YVPaIA3d1bPQTyWqE\"}}"); - } - - @Override - public void onMessage(WebSocket webSocket, String string) { - System.out.println("mock server recv: " + string); - JsonObject req = JsonUtils.parse(string); - if (string.contains("input_text_buffer.commit")) { - webSocket.send( - "{\"event_id\":\"event_B7p1sjr6AY4OTH2dVhr43\",\"type\":\"input_text_buffer.committed\",\"item_id\":\"\"}"); - webSocket.send( - "{\"event_id\":\"event_EnRZHRRqOOpNZnj56onql\",\"type\":\"response.audio.delta\",\"response_id\":\"resp_Hk9RCaeoY9bEA9aHoenUD\",\"item_id\":\"item_QlDl3OxhiOOv5ZGVRcL5K\",\"output_index\":0,\"content_index\":0,\"delta\":\"xxxx\"}"); - } - } - - @Override - public void onFailure( - @NotNull WebSocket webSocket, - @NotNull Throwable t, - @Nullable Response response) { - super.onFailure(webSocket, t, response); - t.printStackTrace(); - System.out.println("Mock Server onFailure" + t.getMessage()); - } - }); - mockServer.enqueue(response); - } - - @AfterAll - public static void after() throws IOException { - System.out.println("Mock Server is closed"); - } - - @Test - public void testQwenTtsRealtime() throws NoApiKeyException, InterruptedException, IOException { - System.out.println("############ Start Test Qwen Tts Realtime ############"); - int port = mockServer.getPort(); - // 获取 URL - String url = mockServer.url("/binary").toString(); - - // 在真实世界中,你会在这里做 HTTP 请求,并得到响应 - System.out.println("Mock Server is running at: " + url); - QwenTtsRealtimeParam param = - QwenTtsRealtimeParam.builder() - .model("qwen-tts-realtime") - .apikey("1234") - .url(String.format("http://127.0.0.1:%s", port)) - .build(); - final String[] audio = {"wrong text"}; - - AtomicReference textLatch = new AtomicReference<>(null); - textLatch.set(new CountDownLatch(1)); - - QwenTtsRealtime ttsRealtime = - new QwenTtsRealtime( - param, - new QwenTtsRealtimeCallback() { - @Override - public void onOpen() { - super.onOpen(); - } - - @Override - public void onEvent(JsonObject message) { - System.out.println("onEvent:" + message); - if (message.get("type").getAsString().equals("response.audio.delta")) { - audio[0] = message.get("delta").getAsString(); - textLatch.get().countDown(); - } - } - - @Override - public void onClose(int code, String reason) { - System.out.println("onClose:" + code + ", " + reason); - } - }); - - ttsRealtime.connect(); - QwenTtsRealtimeConfig config = - QwenTtsRealtimeConfig.builder() - .voice("Chelsie") - .mode("commit") - .languageType("zh") - .build(); - ttsRealtime.updateSession(config); - ttsRealtime.appendText("你好"); - ttsRealtime.commit(); - textLatch.get().await(1000, java.util.concurrent.TimeUnit.MILLISECONDS); - assertEquals("xxxx", audio[0]); - ttsRealtime.close(); - } + private static MockWebServer mockServer; + + @BeforeAll + public static void before() throws IOException { + mockServer = new MockWebServer(); + mockServer.start(); + MockResponse response = + new MockResponse() + .withWebSocketUpgrade( + new WebSocketListener() { + String task_id = ""; + + @Override + public void onOpen(WebSocket webSocket, Response response) { + System.out.println("Mock Server onOpen"); + System.out.println( + "Mock Server request header:" + response.request().headers()); + System.out.println("Mock Server response header:" + response.headers()); + System.out.println("Mock Server response:" + response); + webSocket.send( + "{\"event_id\":\"event_RiXR49wQsjYDGIdFZZvwV\",\"type\":\"session.created\",\"session\":{\"object\":\"realtime.session\",\"mode\":\"server_commit\",\"model\":\"qwen-tts-realtime\",\"voice\":\"Cherry\",\"response_format\":\"pcm\",\"sample_rate\":24000,\"id\":\"sess_Wf61YVPaIA3d1bPQTyWqE\"}}"); + } + + @Override + public void onMessage(WebSocket webSocket, String string) { + System.out.println("mock server recv: " + string); + JsonObject req = JsonUtils.parse(string); + if (string.contains("input_text_buffer.commit")) { + webSocket.send( + "{\"event_id\":\"event_B7p1sjr6AY4OTH2dVhr43\",\"type\":\"input_text_buffer.committed\",\"item_id\":\"\"}"); + webSocket.send( + "{\"event_id\":\"event_EnRZHRRqOOpNZnj56onql\",\"type\":\"response.audio.delta\",\"response_id\":\"resp_Hk9RCaeoY9bEA9aHoenUD\",\"item_id\":\"item_QlDl3OxhiOOv5ZGVRcL5K\",\"output_index\":0,\"content_index\":0,\"delta\":\"xxxx\"}"); + } + } + + @Override + public void onFailure( + @NotNull WebSocket webSocket, + @NotNull Throwable t, + @Nullable Response response) { + super.onFailure(webSocket, t, response); + t.printStackTrace(); + System.out.println("Mock Server onFailure" + t.getMessage()); + } + }); + mockServer.enqueue(response); + } + + @AfterAll + public static void after() throws IOException { + System.out.println("Mock Server is closed"); + } + + @Test + public void testQwenTtsRealtime() throws NoApiKeyException, InterruptedException, IOException { + System.out.println("############ Start Test Qwen Tts Realtime ############"); + int port = mockServer.getPort(); + // 获取 URL + String url = mockServer.url("/binary").toString(); + + // 在真实世界中,你会在这里做 HTTP 请求,并得到响应 + System.out.println("Mock Server is running at: " + url); + QwenTtsRealtimeParam param = + QwenTtsRealtimeParam.builder() + .model("qwen-tts-realtime") + .apikey("1234") + .url(String.format("http://127.0.0.1:%s", port)) + .build(); + final String[] audio = {"wrong text"}; + + AtomicReference textLatch = new AtomicReference<>(null); + textLatch.set(new CountDownLatch(1)); + + QwenTtsRealtime ttsRealtime = + new QwenTtsRealtime( + param, + new QwenTtsRealtimeCallback() { + @Override + public void onOpen() { + super.onOpen(); + } + + @Override + public void onEvent(JsonObject message) { + System.out.println("onEvent:" + message); + if (message.get("type").getAsString().equals("response.audio.delta")) { + audio[0] = message.get("delta").getAsString(); + textLatch.get().countDown(); + } + } + + @Override + public void onClose(int code, String reason) { + System.out.println("onClose:" + code + ", " + reason); + } + }); + + ttsRealtime.connect(); + QwenTtsRealtimeConfig config = + QwenTtsRealtimeConfig.builder().voice("Chelsie").mode("commit").languageType("zh").build(); + ttsRealtime.updateSession(config); + ttsRealtime.appendText("你好"); + ttsRealtime.commit(); + textLatch.get().await(1000, java.util.concurrent.TimeUnit.MILLISECONDS); + assertEquals("xxxx", audio[0]); + ttsRealtime.close(); + } } diff --git a/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java b/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java index 9a55102..4d92b85 100644 --- a/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java +++ b/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java @@ -8,6 +8,7 @@ import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.utils.Constants; import com.alibaba.dashscope.utils.JsonUtils; +import java.io.IOException; import lombok.extern.slf4j.Slf4j; import okhttp3.MediaType; import okhttp3.mockwebserver.MockResponse; @@ -21,14 +22,12 @@ import org.junit.jupiter.api.parallel.ExecutionMode; import org.junitpioneer.jupiter.SetEnvironmentVariable; -import java.io.IOException; - @Execution(ExecutionMode.SAME_THREAD) @Slf4j @SetEnvironmentVariable(key = "DASHSCOPE_API_KEY", value = "1234") public class TestVideoSynthesis { private static final MediaType MEDIA_TYPE_APPLICATION_JSON = - MediaType.parse("application/json; charset=utf-8"); + MediaType.parse("application/json; charset=utf-8"); MockWebServer server; @BeforeEach @@ -43,23 +42,23 @@ public void after() throws IOException { @Test public void testVideoSynthesisNormal() - throws ApiException, NoApiKeyException, IOException, InterruptedException, + throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1}}"; assert MEDIA_TYPE_APPLICATION_JSON != null; server.enqueue( - new MockResponse() - .setBody(responseBody) - .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); + new MockResponse() + .setBody(responseBody) + .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); int port = server.getPort(); VideoSynthesis is = new VideoSynthesis(); VideoSynthesisParam param = - VideoSynthesisParam.builder() - .model(VideoSynthesis.Models.WANX_KF2V) - .firstFrameUrl("https://www.xxx.cn/a.png") - .lastFrameUrl("https://www.xxx.cn/b.png") - .build(); + VideoSynthesisParam.builder() + .model(VideoSynthesis.Models.WANX_KF2V) + .firstFrameUrl("https://www.xxx.cn/a.png") + .lastFrameUrl("https://www.xxx.cn/b.png") + .build(); Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port); VideoSynthesisResult result = is.asyncCall(param); String resultJson = JsonUtils.toJson(result); @@ -71,31 +70,31 @@ public void testVideoSynthesisNormal() String requestBody = request.getBody().readUtf8(); System.out.println(requestBody); String expectRequestBody = - "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":5,\"with_audio\":false,\"size\":\"1280*720\",\"resolution\":\"720P\"}}"; + "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":5,\"with_audio\":false,\"size\":\"1280*720\",\"resolution\":\"720P\"}}"; Assertions.assertEquals(expectRequestBody, requestBody); } @Test public void testVideoSynthesisUsageMore() - throws ApiException, NoApiKeyException, IOException, InterruptedException, + throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1}}"; assert MEDIA_TYPE_APPLICATION_JSON != null; server.enqueue( - new MockResponse() - .setBody(responseBody) - .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); + new MockResponse() + .setBody(responseBody) + .setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON)); int port = server.getPort(); VideoSynthesis is = new VideoSynthesis(); VideoSynthesisParam param = - VideoSynthesisParam.builder() - .model(VideoSynthesis.Models.WANX_KF2V) - .firstFrameUrl("https://www.xxx.cn/a.png") - .lastFrameUrl("https://www.xxx.cn/b.png") - .duration(4) - .seed(1234) - .build(); + VideoSynthesisParam.builder() + .model(VideoSynthesis.Models.WANX_KF2V) + .firstFrameUrl("https://www.xxx.cn/a.png") + .lastFrameUrl("https://www.xxx.cn/b.png") + .duration(4) + .seed(1234) + .build(); Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port); VideoSynthesisResult result = is.asyncCall(param); String resultJson = JsonUtils.toJson(result); @@ -106,7 +105,7 @@ public void testVideoSynthesisUsageMore() String requestBody = request.getBody().readUtf8(); System.out.println(requestBody); String expectRequestBody = - "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":4,\"with_audio\":false,\"size\":\"1280*720\",\"seed\":1234,\"resolution\":\"720P\"}}"; + "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":4,\"with_audio\":false,\"size\":\"1280*720\",\"seed\":1234,\"resolution\":\"720P\"}}"; Assertions.assertEquals(expectRequestBody, requestBody); } } From 91ccc0d973dbc82c25fd1bbf8d576eea1e663434 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 22 Jan 2026 17:29:12 +0800 Subject: [PATCH 49/64] feat(ci): add build_test action --- .github/workflows/build-and-test.yml | 109 ++++++++++++++++++ .../com/alibaba/dashscope/TestApiKey.java | 12 +- .../dashscope/TestBatchTextEmbedding.java | 8 +- .../dashscope/TestHalfDuplexHttpApi.java | 2 +- .../alibaba/dashscope/TestImageSynthesis.java | 2 +- .../alibaba/dashscope/TestVideoSynthesis.java | 6 +- 6 files changed, 129 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/build-and-test.yml diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 0000000..b4a6d79 --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,109 @@ +# +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name: 🛠️ Build and Test +on: + push: + branches: + - main + - master + paths-ignore: + - '**.md' + pull_request: + branches: + - main + - master + paths-ignore: + - '**.md' + workflow_dispatch: # Allow manual trigger + +env: + JAVA_VERSION: '8' + JAVA_DISTRIBUTION: 'temurin' + +permissions: + contents: read + +jobs: + checkstyle: + runs-on: ubuntu-22.04 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 + with: + java-version: ${{ env.JAVA_VERSION }} + distribution: ${{ env.JAVA_DISTRIBUTION }} + cache: 'maven' + + - name: Run checkstyle + run: | + if [ -f lint.sh ]; then + chmod +x lint.sh + ./lint.sh + else + echo "No lint.sh found, skipping checkstyle" + fi + + test: + runs-on: ubuntu-22.04 + needs: [checkstyle] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 + with: + java-version: ${{ env.JAVA_VERSION }} + distribution: ${{ env.JAVA_DISTRIBUTION }} + cache: 'maven' + + - name: Run tests + run: mvn clean test + env: + DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results + path: target/surefire-reports/ + + build: + runs-on: ubuntu-22.04 + needs: [checkstyle, test] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 + with: + java-version: ${{ env.JAVA_VERSION }} + distribution: ${{ env.JAVA_DISTRIBUTION }} + cache: 'maven' + + - name: Build project + run: mvn clean package -DskipTests + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + path: target/*.jar diff --git a/src/test/java/com/alibaba/dashscope/TestApiKey.java b/src/test/java/com/alibaba/dashscope/TestApiKey.java index c484fa1..395b614 100644 --- a/src/test/java/com/alibaba/dashscope/TestApiKey.java +++ b/src/test/java/com/alibaba/dashscope/TestApiKey.java @@ -29,8 +29,9 @@ public class TestApiKey { @Test @SetEnvironmentVariable(key = "DASHSCOPE_API_KEY", value = environmentValue) public void testSetWithEnvValue() throws NoApiKeyException { + Constants.apiKey = null; // Clear any previously set API key String apiKey = ApiKey.getApiKey(null); - assertEquals(apiKey, environmentValue); + assertEquals(environmentValue, apiKey); } @Test @@ -49,9 +50,16 @@ public void testSetWithConstants() throws NoApiKeyException { @Test public void testWithDefaultFile() throws NoApiKeyException, IOException { + Constants.apiKey = null; // Clear any previously set API key Path homePath = Paths.get(System.getProperty("user.home")); Path dashscopePath = homePath.resolve(".dashscope").resolve("api_key"); String expectedValue = "4444"; + // Delete if exists before creating + if (Files.exists(dashscopePath.getParent()) && Files.isDirectory(dashscopePath.getParent())) { + if (Files.exists(dashscopePath)) { + Files.delete(dashscopePath); + } + } Files.createDirectories(dashscopePath.getParent()); Files.write( dashscopePath, @@ -65,7 +73,9 @@ public void testWithDefaultFile() throws NoApiKeyException, IOException { @Test @SetEnvironmentVariable(key = "DASHSCOPE_API_KEY_FILE_PATH", value = environmentPath) + @ClearEnvironmentVariable(key = "DASHSCOPE_API_KEY") public void testWithEnvFile() throws NoApiKeyException, IOException { + Constants.apiKey = null; // Clear any previously set API key String expectedValue = "555"; Files.write( Paths.get(environmentPath), diff --git a/src/test/java/com/alibaba/dashscope/TestBatchTextEmbedding.java b/src/test/java/com/alibaba/dashscope/TestBatchTextEmbedding.java index a8c613a..de447e7 100644 --- a/src/test/java/com/alibaba/dashscope/TestBatchTextEmbedding.java +++ b/src/test/java/com/alibaba/dashscope/TestBatchTextEmbedding.java @@ -52,7 +52,7 @@ public void testCreateAsyncTask() throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"}}"; + "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"},\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; server.enqueue( new MockResponse() .setBody(responseBody) @@ -81,7 +81,7 @@ public void testFetchTask() throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"}}"; + "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"},\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; server.enqueue( new MockResponse() .setBody(responseBody) @@ -101,7 +101,7 @@ public void testFetchTaskWithDiffUrl() throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"}}"; + "{\"request_id\":\"78a74ba9-b8eb-9ca5-ab34-5a56f453cf03\",\"output\":{\"task_id\":\"2a1d8589-7148-422a-b9e7-f41682f07160\",\"task_status\":\"PENDING\"},\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; server.enqueue( new MockResponse() .setBody(responseBody) @@ -132,7 +132,7 @@ public void testListParameters() throws ApiException, NoApiKeyException, Interru .status("SUCCEEDED") .build(); String responseBody = - "{\"request_id\":\"31a80745-990d-958b-ad1c-fd51f17a6996\",\"data\":[{\"api_key_id\":\"1\",\"caller_parent_id\":\"2\",\"caller_uid\":\"3\",\"end_time\":1691561396394,\"gmt_create\":1691561394828,\"model_name\":\"pre-offline-file-embedding\",\"region\":\"cn-beijing\",\"request_id\":\"5ddcdba0-9b22-93c1-946e-1eb152b77efa\",\"start_time\":1691561395295,\"status\":\"SUCCEEDED\",\"task_id\":\"bb7c1bdb-d8de-4619-83b8-9ad3c3313def\",\"user_api_unique_key\":\"apikey:v1:embeddings:text-embedding:text-embedding:pre-offline-file-embedding\"}],\"total\":1,\"total_page\":1,\"page_no\":1,\"page_size\":10}"; + "{\"request_id\":\"31a80745-990d-958b-ad1c-fd51f17a6996\",\"data\":[{\"api_key_id\":\"1\",\"caller_parent_id\":\"2\",\"caller_uid\":\"3\",\"end_time\":1691561396394,\"gmt_create\":1691561394828,\"model_name\":\"pre-offline-file-embedding\",\"region\":\"cn-beijing\",\"request_id\":\"5ddcdba0-9b22-93c1-946e-1eb152b77efa\",\"start_time\":1691561395295,\"status\":\"SUCCEEDED\",\"task_id\":\"bb7c1bdb-d8de-4619-83b8-9ad3c3313def\",\"user_api_unique_key\":\"apikey:v1:embeddings:text-embedding:text-embedding:pre-offline-file-embedding\"}],\"total\":1,\"total_page\":1,\"page_no\":1,\"page_size\":10,\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; server.enqueue( new MockResponse() .setBody(responseBody) diff --git a/src/test/java/com/alibaba/dashscope/TestHalfDuplexHttpApi.java b/src/test/java/com/alibaba/dashscope/TestHalfDuplexHttpApi.java index cd17ffe..d72c97b 100644 --- a/src/test/java/com/alibaba/dashscope/TestHalfDuplexHttpApi.java +++ b/src/test/java/com/alibaba/dashscope/TestHalfDuplexHttpApi.java @@ -78,7 +78,7 @@ public void testHttpSendEmptyResponse() throws ApiException, NoApiKeyException, HalfDuplexTestParam.builder().model("qwen-turbo").parameter("k1", "v1").build(); Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port); DashScopeResult result = syncApi.call(param); - assertEquals(JsonUtils.toJson(result), "{}"); + assertEquals(JsonUtils.toJson(result), "{\"statusCode\":200,\"code\":\"\",\"message\":\"\"}"); server.close(); } diff --git a/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java b/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java index bc5db36..0da603f 100644 --- a/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java +++ b/src/test/java/com/alibaba/dashscope/TestImageSynthesis.java @@ -48,7 +48,7 @@ public void testImageSynthesisNormal() throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"results\":[{\"url\":\"https://1\"},{\"url\":\"https://2\"},{\"url\":\"https://\"},{\"url\":\"https://4\"}],\"task_metrics\":{\"TOTAL\":4,\"SUCCEEDED\":4,\"FAILED\":0}},\"usage\":{\"image_count\":4},\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; server.enqueue( new MockResponse() .setBody(responseBody) diff --git a/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java b/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java index 4d92b85..fff9a0a 100644 --- a/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java +++ b/src/test/java/com/alibaba/dashscope/TestVideoSynthesis.java @@ -45,7 +45,7 @@ public void testVideoSynthesisNormal() throws ApiException, NoApiKeyException, IOException, InterruptedException, InputRequiredException { String responseBody = - "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1}}"; + "{\"request_id\":\"39\",\"output\":{\"task_id\":\"e4\",\"task_status\":\"SUCCEEDED\",\"video_url\":\"https://1\"},\"usage\":{\"video_count\":1,\"duration\":0.0,\"input_video_duration\":0.0,\"output_video_duration\":0.0},\"status_code\":200,\"code\":\"\",\"message\":\"\"}"; assert MEDIA_TYPE_APPLICATION_JSON != null; server.enqueue( new MockResponse() @@ -70,7 +70,7 @@ public void testVideoSynthesisNormal() String requestBody = request.getBody().readUtf8(); System.out.println(requestBody); String expectRequestBody = - "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":5,\"with_audio\":false,\"size\":\"1280*720\",\"resolution\":\"720P\"}}"; + "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"with_audio\":false}}"; Assertions.assertEquals(expectRequestBody, requestBody); } @@ -105,7 +105,7 @@ public void testVideoSynthesisUsageMore() String requestBody = request.getBody().readUtf8(); System.out.println(requestBody); String expectRequestBody = - "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":4,\"with_audio\":false,\"size\":\"1280*720\",\"seed\":1234,\"resolution\":\"720P\"}}"; + "{\"model\":\"wanx-kf2v\",\"input\":{\"extend_prompt\":true,\"first_frame_url\":\"https://www.xxx.cn/a.png\",\"last_frame_url\":\"https://www.xxx.cn/b.png\"},\"parameters\":{\"duration\":4,\"with_audio\":false,\"seed\":1234}}"; Assertions.assertEquals(expectRequestBody, requestBody); } } From 1d995773b02bf21137361c81dd749c5af526f454 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 22 Jan 2026 17:49:42 +0800 Subject: [PATCH 50/64] feat(ci): remove reduntant checkstyle job --- .github/workflows/build-and-test.yml | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b4a6d79..e45bec8 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -37,31 +37,8 @@ permissions: contents: read jobs: - checkstyle: - runs-on: ubuntu-22.04 - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up JDK ${{ env.JAVA_VERSION }} - uses: actions/setup-java@v4 - with: - java-version: ${{ env.JAVA_VERSION }} - distribution: ${{ env.JAVA_DISTRIBUTION }} - cache: 'maven' - - - name: Run checkstyle - run: | - if [ -f lint.sh ]; then - chmod +x lint.sh - ./lint.sh - else - echo "No lint.sh found, skipping checkstyle" - fi - test: runs-on: ubuntu-22.04 - needs: [checkstyle] steps: - name: Checkout code uses: actions/checkout@v4 @@ -87,7 +64,7 @@ jobs: build: runs-on: ubuntu-22.04 - needs: [checkstyle, test] + needs: [test] steps: - name: Checkout code uses: actions/checkout@v4 From 18829315839006c7f2677c79e07b1f02dd96cbc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 27 Jan 2026 15:23:52 +0800 Subject: [PATCH 51/64] feat(model/qwen3-tts-realtime):support instruct param --- .../multimodalconversation/AudioParameters.java | 4 ++-- .../qwen_tts_realtime/QwenTtsRealtimeConfig.java | 14 ++++++++++++++ .../QwenTtsRealtimeConstants.java | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java index d77aaed..a468f6a 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/AudioParameters.java @@ -113,8 +113,8 @@ public enum Voice { @SerializedName("Aiden") AIDEN("Aiden"), - @SerializedName("Eldric Saga") - ELDRIC_SAGA("Eldric Saga"), + @SerializedName("Eldric Sage") + ELDRIC_SAGE("Eldric Sage"), @SerializedName("Mia") MIA("Mia"), diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index 4ab517b..cf7e187 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -47,6 +47,12 @@ public class QwenTtsRealtimeConfig { /** text normalization, default is true */ @Builder.Default Boolean enableTn = true; + /** instructions for tts, default is null */ + @Builder.Default String instructions = null; + + /** instructions will optimize on server side, default is null */ + @Builder.Default String optimizeInstructions = null; + /** The extra parameters. */ @Builder.Default Map parameters = null; @@ -84,6 +90,14 @@ public JsonObject getConfig() { config.put(QwenTtsRealtimeConstants.ENABLE_TN, enableTn); } + if (instructions != null) { + config.put(QwenTtsRealtimeConstants.INSTRUCTIONS, instructions); + } + + if (optimizeInstructions != null) { + config.put(QwenTtsRealtimeConstants.OPTIMIZE_INSTRUCTIONS, optimizeInstructions); + } + if (parameters != null) { for (Map.Entry entry : parameters.entrySet()) { config.put(entry.getKey(), entry.getValue()); diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java index f6eb119..7b19f9d 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java @@ -15,6 +15,8 @@ public class QwenTtsRealtimeConstants { public static final String BIT_RATE = "bit_rate"; public static final String LANGUAGE_TYPE = "language_type"; public static final String ENABLE_TN = "enable_tn"; + public static final String INSTRUCTIONS = "instructions"; + public static final String OPTIMIZE_INSTRUCTIONS = "optimize_instructions"; public static final String PROTOCOL_EVENT_ID = "event_id"; public static final String PROTOCOL_TYPE = "type"; public static final String PROTOCOL_SESSION = "session"; From 6c1551c7ff132cde1c6af49311f5b7ec42243519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Thu, 29 Jan 2026 10:37:47 +0800 Subject: [PATCH 52/64] feat(model/qwen3-tts-realtime):optimize_instructions params --- .../audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java index cf7e187..8de632d 100644 --- a/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java +++ b/src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java @@ -50,8 +50,8 @@ public class QwenTtsRealtimeConfig { /** instructions for tts, default is null */ @Builder.Default String instructions = null; - /** instructions will optimize on server side, default is null */ - @Builder.Default String optimizeInstructions = null; + /** instructions will optimize on server side, default is false */ + @Builder.Default Boolean optimizeInstructions = false; /** The extra parameters. */ @Builder.Default Map parameters = null; @@ -94,8 +94,8 @@ public JsonObject getConfig() { config.put(QwenTtsRealtimeConstants.INSTRUCTIONS, instructions); } - if (optimizeInstructions != null) { - config.put(QwenTtsRealtimeConstants.OPTIMIZE_INSTRUCTIONS, optimizeInstructions); + if (optimizeInstructions != null && optimizeInstructions) { + config.put(QwenTtsRealtimeConstants.OPTIMIZE_INSTRUCTIONS, true); } if (parameters != null) { From e06dd85841fb3466891f7cebdd8ae19cb57a3ad9 Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Tue, 3 Feb 2026 10:48:57 +0800 Subject: [PATCH 53/64] release version 2.22.7 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index acb62eb..b45f671 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.6 + 2.22.7 8 From 664966cc5c8ef746e8a045c2e996d848dceafb10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=A5=E4=BB=99?= Date: Tue, 27 Jan 2026 14:23:57 +0800 Subject: [PATCH 54/64] (feat:model/cosyvoice):use new protocol to build websocket connection --- .../protocol/AudioWebsocketCallback.java | 18 + .../audio/protocol/AudioWebsocketRequest.java | 162 ++++++ .../audio/ttsv2/SpeechSynthesizerV2.java | 544 ++++++++++++++++++ .../TestTtsV2SpeechSynthesizerV2.java | 160 ++++++ 4 files changed, 884 insertions(+) create mode 100644 src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketCallback.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketRequest.java create mode 100644 src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizerV2.java create mode 100644 src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizerV2.java diff --git a/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketCallback.java b/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketCallback.java new file mode 100644 index 0000000..93dcce2 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketCallback.java @@ -0,0 +1,18 @@ +package com.alibaba.dashscope.audio.protocol; + +import java.nio.ByteBuffer; +import okhttp3.WebSocket; + +/** @author songsong.shao */ +public interface AudioWebsocketCallback { + + void onOpen(); + + void onMessage(WebSocket webSocket, String text); + + void onMessage(WebSocket webSocket, ByteBuffer buffer); + + void onError(WebSocket webSocket, Throwable t); + + void onClose(int code, String reason); +} diff --git a/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketRequest.java b/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketRequest.java new file mode 100644 index 0000000..9cea829 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/protocol/AudioWebsocketRequest.java @@ -0,0 +1,162 @@ +package com.alibaba.dashscope.audio.protocol; + +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.protocol.DashScopeHeaders; +import com.alibaba.dashscope.protocol.okhttp.OkHttpClientFactory; +import com.alibaba.dashscope.utils.ApiKey; +import com.alibaba.dashscope.utils.Constants; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; +import okhttp3.*; +import okio.ByteString; + +/** @author songsong.shao */ +@Slf4j +public class AudioWebsocketRequest extends WebSocketListener { + + private OkHttpClient client; + private WebSocket websocketClient; + private AtomicBoolean isOpen = new AtomicBoolean(false); + private AtomicReference connectLatch = new AtomicReference<>(null); + private AtomicBoolean isClosed = new AtomicBoolean(false); + private AudioWebsocketCallback callback; + private Integer connectTimeout = 5000; + + public boolean isOpen() { + return isOpen.get(); + } + + public boolean isClosed() { + return isClosed.get(); + } + + public void checkStatus() { + if (this.isClosed.get()) { + throw new RuntimeException("Websocket is already closed!"); + } + } + + public void connect( + String apiKey, + String workspace, + Map customHeaders, + String baseWebSocketUrl, + AudioWebsocketCallback callback) + throws NoApiKeyException, InterruptedException, RuntimeException { + Request request = + buildConnectionRequest( + ApiKey.getApiKey(apiKey), false, workspace, customHeaders, baseWebSocketUrl); + this.callback = callback; + client = OkHttpClientFactory.getOkHttpClient(); + websocketClient = client.newWebSocket(request, this); + connectLatch.set(new CountDownLatch(1)); + boolean result = connectLatch.get().await(connectTimeout, TimeUnit.MILLISECONDS); + if (!result) { + throw new RuntimeException( + "TimeoutError: waiting for websocket connect more than" + connectTimeout + " ms."); + } + } + + private Request buildConnectionRequest( + String apiKey, + boolean isSecurityCheck, + String workspace, + Map customHeaders, + String baseWebSocketUrl) + throws NoApiKeyException { + // build the request builder. + Request.Builder bd = new Request.Builder(); + bd.headers( + Headers.of( + DashScopeHeaders.buildWebSocketHeaders( + apiKey, isSecurityCheck, workspace, customHeaders))); + String url = Constants.baseWebsocketApiUrl; + if (baseWebSocketUrl != null) { + url = baseWebSocketUrl; + } + Request request = bd.url(url).build(); + return request; + } + + private void sendMessage(String message, boolean enableLog) { + checkStatus(); + if (enableLog) { + log.debug("send message: " + message); + } + if (!websocketClient.send(message)) { + log.warn("Failed to enqueue websocket text message for sending."); + } + } + + public void close() { + this.close(1000, "bye"); + } + + public void close(int code, String reason) { + checkStatus(); + websocketClient.close(code, reason); + isClosed.set(true); + } + + public void sendTextMessage(String message) { + checkStatus(); + this.sendMessage(message, true); + } + + public void sendBinaryMessage(ByteString rawData) { + checkStatus(); + if (!websocketClient.send(rawData)) { + log.warn("Failed to enqueue websocket binary message for sending."); + } + } + + @Override + public void onOpen(WebSocket webSocket, Response response) { + isOpen.set(true); + if (connectLatch.get() != null) { + connectLatch.get().countDown(); + } + + log.debug("WebSocket opened"); + callback.onOpen(); + } + + @Override + public void onMessage(WebSocket webSocket, String text) { + callback.onMessage(webSocket, text); + } + + @Override + public void onMessage(WebSocket webSocket, ByteString bytes) { + log.debug("Received binary message"); + callback.onMessage(webSocket, bytes.asByteBuffer()); + } + + @Override + public void onClosed(WebSocket webSocket, int code, String reason) { + isOpen.set(false); + isClosed.set(true); + if (connectLatch.get() != null) { + connectLatch.get().countDown(); + } + log.debug("WebSocket closed"); + callback.onClose(code, reason); + } + + @Override + public void onFailure(WebSocket webSocket, Throwable t, Response response) { + log.error("WebSocket failed: " + t); + if (connectLatch.get() != null) { + connectLatch.get().countDown(); + } + if (callback != null) { + callback.onError(webSocket, t); + } else { + throw new RuntimeException(t); + } + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizerV2.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizerV2.java new file mode 100644 index 0000000..94acb33 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesizerV2.java @@ -0,0 +1,544 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.ttsv2; + +import com.alibaba.dashscope.audio.protocol.AudioWebsocketCallback; +import com.alibaba.dashscope.audio.protocol.AudioWebsocketRequest; +import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; +import com.alibaba.dashscope.audio.tts.SpeechSynthesisUsage; +import com.alibaba.dashscope.audio.tts.timestamp.Sentence; +import com.alibaba.dashscope.common.*; +import com.alibaba.dashscope.exception.ApiException; +import com.alibaba.dashscope.exception.InputRequiredException; +import com.alibaba.dashscope.exception.NoApiKeyException; +import com.alibaba.dashscope.protocol.*; +import com.alibaba.dashscope.utils.Constants; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import okhttp3.WebSocket; + +/** @author songsong.sss */ +@Slf4j +public final class SpeechSynthesizerV2 implements AudioWebsocketCallback { + private SpeechSynthesisState state = SpeechSynthesisState.IDLE; + private ResultCallback callback; + + private AtomicReference stopLatch = new AtomicReference<>(null); + + private SpeechSynthesisParam parameters; + + private String preRequestId = null; + private boolean isFirst = true; + private AtomicBoolean canceled = new AtomicBoolean(false); + private boolean asyncCall = false; + private ByteArrayOutputStream audioStream; + private long startStreamTimeStamp = -1; + private long firstPackageTimeStamp = -1; + private double recvAudioLength = 0; + @Getter @Setter private long startedTimeout = 5000; + @Getter @Setter private long firstAudioTimeout = -1; + private AtomicReference startLatch = new AtomicReference<>(null); + private AudioWebsocketRequest websocketRequest; + private String websocketUrl = Constants.baseWebsocketApiUrl; + private JsonObject bailianHeader = new JsonObject(); + private static final String HEADER_ACTION = "action"; + private static final String TASK_ID = "task_id"; + private static final Integer DEFAULT_COMPLETE_TIMEOUT = 60 * 1000; + private String taskId; + private boolean enableSsml = false; + + /** + * CosyVoice Speech Synthesis SDK + * + * @param param Configuration for speech synthesis, including voice type, volume, etc. + * @param callback In non-streaming output scenarios, this can be set to null + * @param baseUrl Base URL + * @param connectionOptions Connection options + */ + public SpeechSynthesizerV2( + SpeechSynthesisParam param, + ResultCallback callback, + String baseUrl, + ConnectionOptions connectionOptions) { + if (baseUrl != null) { + this.websocketUrl = baseUrl; + } + this.parameters = param; + this.callback = callback; + this.asyncCall = this.callback != null; + this.taskId = UUID.randomUUID().toString(); + } + + /** + * CosyVoice Speech Synthesis SDK + * + * @param baseUrl Base URL + * @param connectionOptions Connection options + */ + public SpeechSynthesizerV2(String baseUrl, ConnectionOptions connectionOptions) { + this(null, null, baseUrl, connectionOptions); + } + + /** CosyVoice Speech Synthesis SDK */ + public SpeechSynthesizerV2() { + this(null, null, null, null); + } + + public void updateParamAndCallback( + SpeechSynthesisParam param, ResultCallback callback) { + this.parameters = param; + this.callback = callback; + this.canceled.set(false); + + // reset inner params + this.stopLatch = new AtomicReference<>(null); + this.startLatch = new AtomicReference<>(null); + this.firstAudioTimeout = -1; + this.isFirst = true; + this.audioStream = new ByteArrayOutputStream(); + + this.asyncCall = this.callback != null; + this.taskId = UUID.randomUUID().toString(); + } + + /** + * CosyVoice Speech Synthesis SDK + * + * @param param Configuration for speech synthesis, including voice type, volume, etc. + * @param callback In non-streaming output scenarios, this can be set to null + * @param baseUrl Base URL + */ + public SpeechSynthesizerV2( + SpeechSynthesisParam param, ResultCallback callback, String baseUrl) { + this(param, callback, baseUrl, null); + } + + /** + * CosyVoice Speech Synthesis SDK + * + * @param param Configuration for speech synthesis, including voice type, volume, etc. + * @param callback In non-streaming output scenarios, this can be set to null + */ + public SpeechSynthesizerV2( + SpeechSynthesisParam param, ResultCallback callback) { + this(param, callback, null, null); + } + + public String getLastRequestId() { + return preRequestId; + } + + private void checkConnectStatus() { + websocketRequest.checkStatus(); + } + + public void connect() throws NoApiKeyException, InterruptedException { + startStreamTimeStamp = System.currentTimeMillis(); + this.audioStream = new ByteArrayOutputStream(); + this.canceled.set(false); + if (websocketRequest != null && websocketRequest.isOpen()) { + websocketRequest.close(); + } + + websocketRequest = new AudioWebsocketRequest(); + websocketRequest.connect( + parameters.getApiKey(), + parameters.getWorkspace(), + parameters.getHeaders(), + websocketUrl, + this); + } + + public void close() { + if (websocketRequest != null && websocketRequest.isOpen()) { + try { + websocketRequest.close(); + } catch (Exception e) { + log.warn("Failed to close websocket connection: " + e.getMessage()); + } + } + } + + private synchronized void sendTaskMessage(String action, JsonObject input) { + JsonObject wsMessage = new JsonObject(); + + bailianHeader.addProperty(HEADER_ACTION, action); + bailianHeader.addProperty(TASK_ID, taskId); + + JsonObject payload = new JsonObject(); + if ("run-task".equals(action)) { + payload.addProperty("task_group", "audio"); + payload.addProperty("task", "tts"); + payload.addProperty("function", "SpeechSynthesizer"); + payload.addProperty("model", this.parameters.getModel()); + JsonObject parameters = JsonUtils.toJsonObject(this.parameters.getParameters()); + if (enableSsml) { + parameters.addProperty("enable_ssml", true); + } + payload.add("parameters", parameters); + + payload.add("input", input != null ? input : new JsonObject()); + } else { + payload.add("input", input != null ? input : new JsonObject()); + } + + wsMessage.add("header", JsonUtils.toJsonObject(bailianHeader)); + wsMessage.add("payload", JsonUtils.toJsonObject(payload)); + log.debug("sendTaskMessage: {}", wsMessage.toString()); + websocketRequest.sendTextMessage(wsMessage.toString()); + } + + public void startSynthesizer(boolean enableSsml) throws InterruptedException { + bailianHeader.addProperty("streaming", "duplex"); + this.enableSsml = enableSsml; + sendTaskMessage("run-task", new JsonObject()); + } + + public void sendText(String text) { + JsonObject input = new JsonObject(); + input.addProperty("text", text); + sendTaskMessage("continue-task", input); + } + + public void stopSynthesizer() { + sendTaskMessage("finish-task", new JsonObject()); + } + + @Override + public void onOpen() { + log.info("WebSocket connection opened"); + if (callback != null) { + callback.onOpen(null); + } + } + + @Override + public void onMessage(WebSocket webSocket, String text) { + log.debug("Received text message: " + text); + try { + JsonObject messageObj = JsonParser.parseString(text).getAsJsonObject(); + if (messageObj.has("header")) { + JsonObject header = messageObj.getAsJsonObject("header"); + if (header.has("event")) { + String event = header.get("event").getAsString(); + + switch (event) { + case "task-started": + handleTaskStarted(messageObj); + break; + case "task-finished": + handleTaskFinished(messageObj); + break; + case "task-failed": + handleTaskFailed(messageObj); + break; + case "result-generated": + handleResultGenerated(messageObj); + break; + default: + log.warn("Unknown event: " + event); + break; + } + } + } + } catch (Exception e) { + log.error("Error processing text message: " + e.getMessage(), e); + } + } + + @Override + public void onMessage(WebSocket webSocket, ByteBuffer bytes) { + log.debug("Received binary message, size: {}", bytes.remaining()); + try { + ByteBuffer audioFrame = ByteBuffer.allocate(bytes.remaining()); + audioFrame.put(bytes); + audioFrame.flip(); + + if (callback != null) { + SpeechSynthesisResult result = new SpeechSynthesisResult(); + result.setAudioFrame(audioFrame); + callback.onEvent(result); + } else { + // Use atomic reference compare-and-swap for thread-safe accumulation + accumulateAudioData(audioFrame); + } + + // Update received audio length + recvAudioLength += bytes.remaining(); + + } catch (Exception e) { + log.error("Error processing binary message", e); + if (callback != null) { + callback.onError(e); + } + } + } + + /** + * Accumulates audio data to audioStream. Reuses existing buffer when possible to minimize + * allocations. + */ + private void accumulateAudioData(ByteBuffer frame) throws Exception { + if (audioStream == null) { + audioStream = new ByteArrayOutputStream(); + } + byte[] buffer = new byte[frame.remaining()]; + frame.get(buffer); + audioStream.write(buffer, 0, buffer.length); + } + + @Override + public void onError(WebSocket webSocket, Throwable t) { + if (callback != null) { + // callback error first + callback.onError(new ApiException(t)); + } + + CountDownLatch startLatch = this.startLatch.get(); + if (startLatch != null && startLatch.getCount() > 0) { + startLatch.countDown(); + } + + CountDownLatch stopLatch = this.stopLatch.get(); + if (stopLatch != null && stopLatch.getCount() > 0) { + stopLatch.countDown(); + } + + if (audioStream != null) { + audioStream.reset(); + } + } + + @Override + public void onClose(int code, String reason) { + log.warn("WebSocket connection closed: " + reason + " (" + code + ")"); + } + + private void handleTaskStarted(JsonObject message) { + log.info("Task started"); + state = SpeechSynthesisState.TTS_STARTED; + firstPackageTimeStamp = -1; + if (startLatch.get() != null) { + startLatch.get().countDown(); + } + } + + private void handleTaskFinished(JsonObject message) { + log.info("Task finished"); + if (stopLatch.get() != null) { + stopLatch.get().countDown(); + } + if (callback != null) { + callback.onComplete(); + } + if (audioStream != null) { + audioStream.reset(); // 重置 ByteArrayOutputStream,清空数据但保留缓冲区 + } + // Reset for reuse + isFirst = true; + } + + private void handleTaskFailed(JsonObject message) { + log.error("Task failed: " + message.toString()); + if (callback != null) { + String errorMessage = "Unknown error"; + if (message.has("header") && message.getAsJsonObject("header").has("error_message")) { + errorMessage = message.getAsJsonObject("header").get("error_message").getAsString(); + } + + // Create a Status object for the ApiException + com.alibaba.dashscope.common.Status status = + com.alibaba.dashscope.common.Status.builder() + .statusCode(-1) + .code("TASK_FAILED") + .message(errorMessage) + .build(); + callback.onError(new ApiException(status)); + } + if (stopLatch.get() != null) { + stopLatch.get().countDown(); + } + } + + private void handleResultGenerated(JsonObject message) { + log.debug("Result generated: " + message.toString()); + if (callback == null) { + return; + } + SpeechSynthesisResult result = new SpeechSynthesisResult(); + if (message.has("header")) { + JsonObject header = message.getAsJsonObject("header"); + if (header.has("task_id")) { + preRequestId = header.get("task_id").getAsString(); + result.setRequestId(preRequestId); + } + } + if (message.has("payload")) { + JsonObject payload = message.getAsJsonObject("payload"); + if (payload != null && payload.has("output")) { + JsonObject output = payload.getAsJsonObject("output"); + result.setOutput(output); + if (output != null && output.has("sentence")) { + result.setTimestamp( + JsonUtils.fromJsonObject(output.getAsJsonObject("sentence"), Sentence.class)); + } + } + if (payload != null && payload.has("usage")) { + result.setUsage( + JsonUtils.fromJsonObject(payload.getAsJsonObject("usage"), SpeechSynthesisUsage.class)); + } + } + callback.onEvent(result); + } + + /** First Package Delay is the time between start sending text and receive first audio package */ + public long getFirstPackageDelay() { + return this.firstPackageTimeStamp - this.startStreamTimeStamp; + } + + private void startStream(boolean enableSsml) throws NoApiKeyException, InterruptedException { + if (websocketRequest == null || !websocketRequest.isOpen()) { + // if websocket is not open, then connect + connect(); + } else { + startStreamTimeStamp = System.currentTimeMillis(); + } + + checkConnectStatus(); // check websocket connection, if socket is closed. + startLatch = new AtomicReference<>(new CountDownLatch(1)); + startSynthesizer(enableSsml); + boolean startResult = startLatch.get().await(startedTimeout, TimeUnit.MILLISECONDS); + if (!startResult) { + throw new RuntimeException( + "TimeoutError: waiting for task started more than " + startedTimeout + " ms."); + } + } + + private void submitText(String text) { + if (text == null || text.isEmpty()) { + throw new ApiException( + new InputRequiredException("Parameter invalid: text is null or empty")); + } + synchronized (this) { + if (state != SpeechSynthesisState.TTS_STARTED) { + throw new ApiException( + new InputRequiredException( + "State invalid: expect stream input tts state is started but " + state.getValue())); + } + sendText(text); + } + } + + private void startStream() throws NoApiKeyException, InterruptedException { + startStream(false); + } + + public void streamingComplete(long completeTimeoutMillis) { + log.debug("streamingComplete with timeout: " + completeTimeoutMillis); + synchronized (this) { + if (state != SpeechSynthesisState.TTS_STARTED) { + throw new ApiException( + new RuntimeException( + "State invalid: expect stream input tts state is started but " + state.getValue())); + } + } + stopLatch = new AtomicReference<>(new CountDownLatch(1)); + stopSynthesizer(); + + if (stopLatch.get() != null) { + try { + if (completeTimeoutMillis > 0) { + log.debug("start waiting for stopLatch"); + if (!stopLatch.get().await(completeTimeoutMillis, TimeUnit.MILLISECONDS)) { + throw new RuntimeException("TimeoutError: waiting for streaming complete"); + } + } else { + log.debug("start waiting for stopLatch"); + stopLatch.get().await(); + } + log.debug("stopLatch is done"); + } catch (InterruptedException ignored) { + log.error("Interrupted while waiting for streaming complete"); + Thread.currentThread().interrupt(); + } + } + } + + public void streamingComplete() { + streamingComplete(DEFAULT_COMPLETE_TIMEOUT); + } + + public void asyncStreamingComplete() { + synchronized (this) { + if (state != SpeechSynthesisState.TTS_STARTED) { + throw new ApiException( + new RuntimeException( + "State invalid: expect stream input tts state is started but " + state.getValue())); + } + stopSynthesizer(); + } + } + + public void streamingCancel() { + canceled.set(true); + synchronized (this) { + if (state != SpeechSynthesisState.TTS_STARTED) { + return; + } + stopSynthesizer(); + } + } + + public void streamingCall(String text) { + if (isFirst) { + isFirst = false; + try { + this.startStream(false); + this.submitText(text); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); // 恢复中断状态 + log.error("Interrupted while waiting for streaming complete", e); + throw new ApiException(e); + } catch (NoApiKeyException e) { + throw new ApiException(e); + } + } else { + this.submitText(text); + } + } + + public ByteBuffer call(String text, long timeoutMillis) throws RuntimeException { + try { + this.startStream(true); + this.submitText(text); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); // 恢复中断状态 + log.error("Interrupted while waiting for streaming complete", e); + throw new ApiException(e); + } catch (NoApiKeyException e) { + throw new ApiException(e); + } + if (this.asyncCall) { + this.asyncStreamingComplete(); + return null; + } else { + this.streamingComplete(timeoutMillis); + return ByteBuffer.wrap(audioStream.toByteArray()); + } + } + + public ByteBuffer call(String text) { + return call(text, 0); + } +} diff --git a/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizerV2.java b/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizerV2.java new file mode 100644 index 0000000..7e677a4 --- /dev/null +++ b/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizerV2.java @@ -0,0 +1,160 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope; + +import static org.junit.Assert.assertEquals; + +import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; +import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizerV2; +import com.alibaba.dashscope.common.ResultCallback; +import com.alibaba.dashscope.utils.Constants; +import com.alibaba.dashscope.utils.JsonUtils; +import com.google.gson.JsonObject; +import java.io.IOException; +import java.util.ArrayList; +import lombok.extern.slf4j.Slf4j; +import okhttp3.Response; +import okhttp3.WebSocket; +import okhttp3.WebSocketListener; +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okio.ByteString; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@Execution(ExecutionMode.SAME_THREAD) +@Slf4j +public class TestTtsV2SpeechSynthesizerV2 { + private static ArrayList audioBuffer; + private static ResultCallback callback = + new ResultCallback() { + @Override + public void onEvent(SpeechSynthesisResult message) { + System.out.println("onEvent:" + message); + if (message.getAudioFrame() != null) { + for (byte b : message.getAudioFrame().array()) { + audioBuffer.add(b); + } + } + } + + @Override + public void onComplete() { + // System.out.println("onComplete"); + } + + @Override + public void onError(Exception e) {} + }; + private static MockWebServer mockServer; + + @BeforeAll + public static void before() throws IOException { + audioBuffer = new ArrayList<>(); + mockServer = new MockWebServer(); + mockServer.start(); + MockResponse response = + new MockResponse() + .withWebSocketUpgrade( + new WebSocketListener() { + String task_id = ""; + + @Override + public void onOpen(WebSocket webSocket, Response response) { + System.out.println("Mock Server onOpen"); + System.out.println( + "Mock Server request header:" + response.request().headers()); + System.out.println("Mock Server response header:" + response.headers()); + System.out.println("Mock Server response:" + response); + } + + @Override + public void onMessage(WebSocket webSocket, String string) { + System.out.println("mock server recv: " + string); + JsonObject req = JsonUtils.parse(string); + if (task_id == "") { + task_id = req.get("header").getAsJsonObject().get("task_id").getAsString(); + } + if (string.contains("run-task")) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + webSocket.send( + "{'header': {'task_id': '" + + task_id + + "', 'event': 'task-started', 'attributes': {}}, 'payload': {}}"); + } else if (string.contains("finish-task")) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + webSocket.send( + "{'header': {'task_id': '" + + task_id + + "', 'event': 'task-finished', 'attributes': {}}, 'payload': {'output': None, 'usage': {'characters': 7}}}"); + webSocket.close(1000, "close by server"); + } else if (string.contains("continue-task")) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + byte[] binary = new byte[] {0x01, 0x01, 0x01}; + webSocket.send(new ByteString(binary)); + } + } + }); + mockServer.enqueue(response); + } + + @AfterAll + public static void after() throws IOException { + System.out.println("Mock Server is closed"); + mockServer.close(); + } + + @Test + public void testStreamingCall() { + System.out.println("############ Start Test Streaming Call ############"); + int port = mockServer.getPort(); + Constants.baseWebsocketApiUrl = String.format("http://127.0.0.1:%s", port); + + // 获取 URL + String url = mockServer.url("/binary").toString(); + + // 在真实世界中,你会在这里做 HTTP 请求,并得到响应 + System.out.println("Mock Server is running at: " + url); + SpeechSynthesisParam param = + SpeechSynthesisParam.builder() + .apiKey("1234") + .model("cosyvoice-v1") + .voice("longxiaochun") + .format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS) + .build(); + SpeechSynthesizerV2 synthesizer = new SpeechSynthesizerV2(param, callback); + synthesizer.setStartedTimeout(1000); + synthesizer.setFirstAudioTimeout(2000); + for (int i = 0; i < 3; i++) { + synthesizer.streamingCall("今天天气怎么样?"); + } + try { + synthesizer.streamingComplete(); + synthesizer.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + assertEquals(audioBuffer.size(), 9); + for (int i = 0; i < 9; i++) { + assertEquals((byte) audioBuffer.get(i), (byte) 0x01); + } + System.out.println("############ Start Test Streaming Call Done ############"); + } +} From 77bc7d0d68c2f304ef03f710d84b6ed20dc6710d Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Thu, 5 Feb 2026 13:51:47 +0800 Subject: [PATCH 55/64] release version 2.22.8 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b45f671..320ff31 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.7 + 2.22.8 8 From 98185d619e8ce66f882c5f30b9fd552a30d373cc Mon Sep 17 00:00:00 2001 From: songguocola Date: Mon, 9 Feb 2026 14:10:47 +0800 Subject: [PATCH 56/64] feat(model/cosyvoice): support hot_fix params and max_prompt_audio_length (#191) --- .../MultiModalConversationParam.java | 4 ++ .../audio/tts/SpeechSynthesisApiKeywords.java | 8 +++ .../dashscope/audio/ttsv2/ParamHotFix.java | 62 +++++++++++++++++++ .../audio/ttsv2/SpeechSynthesisParam.java | 16 +++++ .../enrollment/VoiceEnrollmentParam.java | 5 ++ .../enrollment/VoiceEnrollmentService.java | 1 + .../dashscope/TestTtsV2SpeechSynthesizer.java | 12 ++++ 7 files changed, 108 insertions(+) create mode 100644 src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationParam.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationParam.java index 2e69b77..1e9cdfb 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationParam.java @@ -182,6 +182,10 @@ public JsonObject getInput() { jsonObject.addProperty(ApiKeywords.VOICE, voice.getValue()); } + if (parameters != null && !parameters.isEmpty() && parameters.containsKey(ApiKeywords.VOICE)) { + jsonObject.addProperty(ApiKeywords.VOICE, (String) parameters.get(ApiKeywords.VOICE)); + } + if (languageType != null) { jsonObject.addProperty(ApiKeywords.LANGUAGE_TYPE, languageType); } diff --git a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisApiKeywords.java b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisApiKeywords.java index d943bbe..6d792fc 100644 --- a/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisApiKeywords.java @@ -6,7 +6,9 @@ public class SpeechSynthesisApiKeywords { public static final String TEXT_TYPE = "text_type"; public static final String FORMAT = "format"; + public static final String BIT_RATE = "bit_rate"; + public static final String VOICE = "voice"; public static final String SAMPLE_RATE = "sample_rate"; @@ -21,6 +23,12 @@ public class SpeechSynthesisApiKeywords { public static final String PHONEME_TIMESTAMP = "phoneme_timestamp_enabled"; + public static final String HOT_FIX = "hot_fix"; + + public static final String PRONUNCIATION = "pronunciation"; + + public static final String REPLACE = "replace"; + public static final String SENTENCE = "sentence"; public static final String WORDS = "words"; diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java new file mode 100644 index 0000000..6a34730 --- /dev/null +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/ParamHotFix.java @@ -0,0 +1,62 @@ +// Copyright (c) Alibaba, Inc. and its affiliates. + +package com.alibaba.dashscope.audio.ttsv2; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Data; + +/** Hot fix configuration for speech synthesis, including pronunciation and replace rules. */ +@Data +public class ParamHotFix { + + /** Pronunciation rules to customize specific words. */ + private List pronunciation; + + /** Replace rules to replace specific words with others. */ + private List replace; + + public ArrayList getPronunciation() { + if (pronunciation == null || pronunciation.isEmpty()) { + return null; + } + ArrayList pronunciationList = new ArrayList<>(); + for (PronunciationItem item : pronunciation) { + HashMap pronunciationItem = new HashMap<>(); + pronunciationItem.put(item.getText(), item.getPinyin()); + pronunciationList.add(pronunciationItem); + } + + return pronunciationList; + } + + public ArrayList getReplace() { + if (replace == null || replace.isEmpty()) { + return null; + } + ArrayList replaceList = new ArrayList<>(); + for (ReplaceItem item : replace) { + HashMap replaceItem = new HashMap<>(); + replaceItem.put(item.getText(), item.getReplacement()); + replaceList.add(replaceItem); + } + + return replaceList; + } + + @Data + @AllArgsConstructor + public static class PronunciationItem { + private String text; + private String pinyin; + } + + @Data + @AllArgsConstructor + public static class ReplaceItem { + private String text; + private String replacement; + } +} diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java index f1c535d..1a9c726 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java @@ -55,6 +55,8 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam { @Builder.Default private List languageHints = null; /** synthesis style */ @Builder.Default private int style = 0; + /** Hot fix configuration for pronunciation and replace rules. */ + @Builder.Default private ParamHotFix hotFix = null; @Override public Map getParameters() { @@ -83,6 +85,20 @@ public Map getParameters() { if (getStyle() != 0) { params.put(SpeechSynthesisApiKeywords.STYLE, getStyle()); } + // Add hot fix parameters if present + if (getHotFix() != null) { + Map hotFixParams = new HashMap<>(); + if (getHotFix().getPronunciation() != null && !getHotFix().getPronunciation().isEmpty()) { + hotFixParams.put(SpeechSynthesisApiKeywords.PRONUNCIATION, getHotFix().getPronunciation()); + } + if (getHotFix().getReplace() != null && !getHotFix().getReplace().isEmpty()) { + hotFixParams.put(SpeechSynthesisApiKeywords.REPLACE, getHotFix().getReplace()); + } + if (!hotFixParams.isEmpty()) { + params.put(SpeechSynthesisApiKeywords.HOT_FIX, hotFixParams); + } + } + params.putAll(parameters); return params; } diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java index a69ab70..fa3febb 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java @@ -25,6 +25,8 @@ public class VoiceEnrollmentParam extends HalfDuplexServiceParam { private int pageIndex; private int pageSize; + /** Maximum length of prompt audio in seconds. */ + private float maxPromptAudioLength = 10.0f; protected VoiceEnrollmentParam(HalfDuplexServiceParamBuilder b) { super(b); @@ -50,6 +52,9 @@ public JsonObject getInput() { if (languageHints != null) { input.add("language_hints", JsonUtils.toJsonArray(languageHints)); } + if (maxPromptAudioLength > 0) { + input.addProperty("max_prompt_audio_length", maxPromptAudioLength); + } break; case LIST: input.addProperty(ApiKeywords.ACTION, operationType.getValue()); diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java index df3246c..23efe5d 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentService.java @@ -144,6 +144,7 @@ public Voice createVoice( .languageHints(customParam.getLanguageHints()) .headers(customParam.getHeaders()) .resources(customParam.getResources()) + .maxPromptAudioLength(customParam.getMaxPromptAudioLength()) .parameters(customParam.getParameters()) .workspace(customParam.getWorkspace()) .build(); diff --git a/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizer.java b/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizer.java index 0320038..c6858cf 100644 --- a/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizer.java +++ b/src/test/java/com/alibaba/dashscope/TestTtsV2SpeechSynthesizer.java @@ -5,6 +5,7 @@ import static org.junit.Assert.assertEquals; import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; +import com.alibaba.dashscope.audio.ttsv2.ParamHotFix; import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat; import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; @@ -129,6 +130,16 @@ public void testStreamingCall() { // 获取 URL String url = mockServer.url("/binary").toString(); + ParamHotFix hotFix = new ParamHotFix(); + ArrayList pronunciations = new ArrayList<>(); + pronunciations.add(new ParamHotFix.PronunciationItem("今天", "jin1 tian1")); + pronunciations.add(new ParamHotFix.PronunciationItem("草地", "cao3 di4")); + hotFix.setPronunciation(pronunciations); + + ArrayList replaces = new ArrayList<>(); + replaces.add(new ParamHotFix.ReplaceItem("草地", "草弟")); + replaces.add(new ParamHotFix.ReplaceItem("惠州", "汇州")); + hotFix.setReplace(replaces); // 在真实世界中,你会在这里做 HTTP 请求,并得到响应 System.out.println("Mock Server is running at: " + url); @@ -138,6 +149,7 @@ public void testStreamingCall() { .model("cosyvoice-v1") .voice("longxiaochun") .format(SpeechSynthesisAudioFormat.MP3_16000HZ_MONO_128KBPS) + .hotFix(hotFix) .build(); SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback); synthesizer.setStartedTimeout(1000); From 9dc44b553302aae43b60106d618494de5f0bf0d1 Mon Sep 17 00:00:00 2001 From: xiongbinbin Date: Mon, 9 Feb 2026 17:15:19 +0800 Subject: [PATCH 57/64] release version 2.22.9 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 320ff31..a4025f3 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.8 + 2.22.9 8 From 1440ed5f6bd40c4d6eeb35b54d5ea5d8c349ab1f Mon Sep 17 00:00:00 2001 From: songguocola Date: Thu, 26 Feb 2026 17:42:58 +0800 Subject: [PATCH 58/64] feat(model/cosyvoice): add custom input by parameters (#193) --- .../audio/ttsv2/enrollment/VoiceEnrollmentParam.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java index fa3febb..e9a0036 100644 --- a/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java +++ b/src/main/java/com/alibaba/dashscope/audio/ttsv2/enrollment/VoiceEnrollmentParam.java @@ -55,6 +55,15 @@ public JsonObject getInput() { if (maxPromptAudioLength > 0) { input.addProperty("max_prompt_audio_length", maxPromptAudioLength); } + // add parameters into input + if (parameters != null) { + for (String key : parameters.keySet()) { + Object value = parameters.get(key); + if (key != null && value != null) { + input.add(key, JsonUtils.toJsonElement(value)); + } + } + } break; case LIST: input.addProperty(ApiKeywords.ACTION, operationType.getValue()); From 9e55127433fd3d787865bad8dd5b346cfc8ada0b Mon Sep 17 00:00:00 2001 From: xiongbinbin Date: Tue, 3 Mar 2026 09:58:12 +0800 Subject: [PATCH 59/64] release version 2.22.10 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a4025f3..106ee2c 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.9 + 2.22.10 8 From 1a4170bbd4b071eda3d59a45cc8d8d0ba942c1cc Mon Sep 17 00:00:00 2001 From: xiongbinbin Date: Tue, 10 Mar 2026 11:01:45 +0800 Subject: [PATCH 60/64] Usage add parameters prompt_tokens_details.cachedTokens --- .../MultiModalConversationTokensDetails.java | 3 +++ .../multimodalconversation/MultiModalConversationUsage.java | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationTokensDetails.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationTokensDetails.java index 702dcb1..94cffda 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationTokensDetails.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationTokensDetails.java @@ -19,4 +19,7 @@ public class MultiModalConversationTokensDetails { @SerializedName("reasoning_tokens") private Integer reasoningTokens; + + @SerializedName("cached_tokens") + private Integer cachedTokens; } diff --git a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationUsage.java b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationUsage.java index b405065..0f9f1e2 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationUsage.java +++ b/src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationUsage.java @@ -41,6 +41,9 @@ public class MultiModalConversationUsage { @SerializedName("output_tokens_details") private MultiModalConversationTokensDetails outputTokensDetails; + @SerializedName("prompt_tokens_details") + private MultiModalConversationTokensDetails promptTokensDetails; + @SerializedName("characters") private Integer characters; } From d469fa987e0cfce67dcef36f0f04f4a7ea48b8c6 Mon Sep 17 00:00:00 2001 From: xiongbinbin Date: Wed, 11 Mar 2026 18:37:41 +0800 Subject: [PATCH 61/64] release version 2.22.11 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 106ee2c..211dbd3 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.10 + 2.22.11 8 From a04ac72ddbdf7e2245c5dfb9d567b37eef5a70f5 Mon Sep 17 00:00:00 2001 From: x-zm <602187256@qq.com> Date: Thu, 12 Mar 2026 10:04:33 +0800 Subject: [PATCH 62/64] support wan 2.7 (#195) * support wan 2.7 * lint commit --------- Co-authored-by: mose-x.zm --- samples/VideoSynthesisUsage.java | 87 +++++- .../imagegeneration/ImageGenerationParam.java | 11 +- .../aigc/videosynthesis/VideoSynthesis.java | 12 + .../videosynthesis/VideoSynthesisParam.java | 255 ++++++++++++++++-- .../alibaba/dashscope/utils/ApiKeywords.java | 8 + 5 files changed, 344 insertions(+), 29 deletions(-) diff --git a/samples/VideoSynthesisUsage.java b/samples/VideoSynthesisUsage.java index d6cb21e..3d33369 100644 --- a/samples/VideoSynthesisUsage.java +++ b/samples/VideoSynthesisUsage.java @@ -18,8 +18,8 @@ public class VideoSynthesisUsage { */ public static void basicCall() throws ApiException, NoApiKeyException, InputRequiredException { VideoSynthesis vs = new VideoSynthesis(); - List referenceVideoUrls = new ArrayList<>(); - referenceVideoUrls.add("https://cdn.wanx.aliyuncs.com/wanx/1014827220770308/upload-video-cut/cda0f4dc063ec258184263691558af36.mp4"); + List referenceUrls = new ArrayList<>(); + referenceUrls.add("https://cdn.wanx.aliyuncs.com/wanx/1014827220770308/upload-video-cut/cda0f4dc063ec258184263691558af36.mp4"); List referenceVideoDescription = new ArrayList<>(); referenceVideoDescription.add("这段视频展示一位年轻女性()身着灰色长袖上衣与裤子,乌黑长发垂落,面容清秀。她先低头后抬头,目光侧移,继而转身背对再面向镜头,动作流畅自然。背景为素净灰色墙面,环境简约无装饰。镜头由面部特写缓缓拉远至全身,光影柔和,突出人物形态与情绪。"); @@ -27,7 +27,7 @@ public static void basicCall() throws ApiException, NoApiKeyException, InputRequ VideoSynthesisParam.builder() .model("wan2.6-r2v") .prompt(" character1 站在海边,吹着海风,夕阳西下,阳光洒在她的脸上") - .referenceVideoUrls(referenceVideoUrls) + .referenceUrls(referenceUrls) .referenceVideoDescription(referenceVideoDescription) .shotType(VideoSynthesis.ShotType.MULTI) .watermark(Boolean.TRUE) @@ -39,6 +39,83 @@ public static void basicCall() throws ApiException, NoApiKeyException, InputRequ VideoSynthesisResult result = vs.call(param); System.out.println(result); } + /** + * Create a video compositing task and wait for the task to complete. + */ + public static void basicCallI2V27() throws ApiException, NoApiKeyException, InputRequiredException { + VideoSynthesis vs = new VideoSynthesis(); + final String prompt = "一幅都市奇幻艺术的场景。一个充满动感的涂鸦艺术角色。一个由喷漆所画成的少年,正从一面混凝土墙上活过来。他一边用极快的语速演唱一首英文rap,一边摆着一个经典的、充满活力的说唱歌手姿势。场景设定在夜晚一个充满都市感的铁路桥下。灯光来自一盏孤零零的街灯,营造出电影般的氛围,充满高能量和惊人的细节。视频的音频部分完全由他的rap构成,没有其他对话或杂音。"; + final String negativePrompt = "ugly, bad anatomy"; + List media = new ArrayList(){{ + add(VideoSynthesisParam.Media.builder() + .url("https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/wpimhv/rap.png") + .type(VideoSynthesis.MediaType.FIRST_CLIP) + .build()); + add(VideoSynthesisParam.Media.builder() + .url("https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20250925/ozwpvi/rap.mp3") + .type(VideoSynthesis.MediaType.DRIVING_AUDIO) + .build()); + }}; + VideoSynthesisParam param = + VideoSynthesisParam.builder() + .model("wan2.7-i2v") + .prompt(prompt) + .media(media) + .watermark(Boolean.TRUE) + .duration(10) + .negativePrompt(negativePrompt) + .size("1280*720") + .build(); + VideoSynthesisResult result = vs.call(param); + System.out.println(result); + } + /** + * Create a video compositing task and wait for the task to complete. + */ + public static void basicCallR2V27() throws ApiException, NoApiKeyException, InputRequiredException { + VideoSynthesis vs = new VideoSynthesis(); + final String prompt = "一幅都市奇幻艺术的场景。一个充满动感的涂鸦艺术角色。一个由喷漆所画成的少年,正从一面混凝土墙上活过来。他一边用极快的语速演唱一首英文rap,一边摆着一个经典的、充满活力的说唱歌手姿势。场景设定在夜晚一个充满都市感的铁路桥下。灯光来自一盏孤零零的街灯,营造出电影般的氛围,充满高能量和惊人的细节。视频的音频部分完全由他的rap构成,没有其他对话或杂音。"; + final String negativePrompt = "ugly, bad anatomy"; + List media = new ArrayList(){{ + add(VideoSynthesisParam.Media.builder() + .url("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/image/res240_269.jpg") + .type(VideoSynthesis.MediaType.REFERENCE_IMAGE) + .build()); + add(VideoSynthesisParam.Media.builder() + .url("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/image/man_5K_7_7K_18_4M.JPG") + .referenceVoice("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/2s.wav") + .type(VideoSynthesis.MediaType.REFERENCE_IMAGE) + .build()); + add(VideoSynthesisParam.Media.builder() + .url("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/100M.mov") + .referenceVoice("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/mp3_1s.mp3") + .type(VideoSynthesis.MediaType.REFERENCE_VIDEO) + .build()); + add(VideoSynthesisParam.Media.builder() + .url("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/29_99s.mp4") + .referenceDescription("这是一个身穿蓝衣的男子,他有着浓密的络腮胡") + .type(VideoSynthesis.MediaType.REFERENCE_VIDEO) + .build()); + add(VideoSynthesisParam.Media.builder() + .url("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/video/resources/cast/cat_127.mp4") + .referenceVoice("https://test-data-center.oss-accelerate.aliyuncs.com/wanx/audio/wav_10s.wav") + .referenceDescription("这是一只毛绒小猫,它正在对着镜头微笑") + .type(VideoSynthesis.MediaType.REFERENCE_VIDEO) + .build()); + }}; + VideoSynthesisParam param = + VideoSynthesisParam.builder() + .model("wan2.7-r2v") + .prompt(prompt) + .media(media) + .watermark(Boolean.TRUE) + .duration(10) + .negativePrompt(negativePrompt) + .size("1280*720") + .build(); + VideoSynthesisResult result = vs.call(param); + System.out.println(result); + } /** * List all tasks. @@ -63,7 +140,9 @@ public static void fetchTask(String taskId) throws ApiException, NoApiKeyExcepti public static void main(String[] args) { try { - basicCall(); +// basicCall(); +// basicCallI2V27(); + basicCallR2V27(); // listTask(); // fetchTask("b451725d-c48f-4f08-9d26-xxx-xxx"); } catch (ApiException | NoApiKeyException | InputRequiredException e) { diff --git a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java index b727c85..adc5239 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/imagegeneration/ImageGenerationParam.java @@ -11,7 +11,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import lombok.*; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.Singular; import lombok.experimental.SuperBuilder; @EqualsAndHashCode(callSuper = true) @@ -73,6 +76,8 @@ public class ImageGenerationParam extends HalfDuplexServiceParam { private Integer maxImages; + private Boolean enableSequential; + @Override public JsonObject getHttpBody() { JsonObject requestObject = new JsonObject(); @@ -149,6 +154,10 @@ public Map getParameters() { params.put("max_images", maxImages); } + if (enableSequential != null) { + params.put("enable_sequential", enableSequential); + } + params.putAll(parameters); return params; } diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java index 5749a63..570d2e1 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java @@ -57,6 +57,18 @@ public static class Resolution { public static class ShotType { public static final String MULTI = "multi"; public static final String SINGLE = "single"; + public static final String AUTO = "auto"; + } + + public static class MediaType { + public static final String FIRST_FRAME = "first_frame"; + public static final String LAST_FRAME = "last_frame"; + public static final String REFERENCE_IMAGE = "reference_image"; + public static final String REFERENCE_VIDEO = "reference_video"; + public static final String REFERENCE_VOICE = "reference_voice"; + public static final String VIDEO = "video"; + public static final String FIRST_CLIP = "first_clip"; + public static final String DRIVING_AUDIO = "driving_audio"; } /** diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index 146bc10..9f6412a 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -16,6 +16,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.*; +import java.util.stream.Collectors; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; @@ -27,6 +29,15 @@ @SuperBuilder public class VideoSynthesisParam extends HalfDuplexServiceParam { + @Data + @SuperBuilder + public static class Media { + @Builder.Default private String url = null; + @Builder.Default private String type = null; + @Builder.Default private String referenceVoice = null; + @Builder.Default private String referenceDescription = null; + } + @Builder.Default private Map inputChecks = new HashMap<>(); @Builder.Default private String size = null; @@ -58,6 +69,15 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { /** list of character reference video file urls uploaded by the user */ @Builder.Default private List referenceVideoUrls = null; + /** list of character reference file urls uploaded by the user */ + @Builder.Default private List referenceUrls = null; + + /** list of character reference file url uploaded by the user */ + @Builder.Default private String referenceUrl = null; + + /** list of media file urls uploaded by the user */ + @Builder.Default private List media = null; + /** * For the description information of the picture and sound of the reference video, corresponding * to ref video, it needs to be in the order of the url. If the quantity is different, an error @@ -97,6 +117,8 @@ public class VideoSynthesisParam extends HalfDuplexServiceParam { /** The enable_overlays parameter. */ @Builder.Default private Boolean enableOverlays = null; + @Builder.Default private String ratio = null; + /** The inputs of the model. */ @Override public JsonObject getInput() { @@ -147,6 +169,18 @@ public JsonObject getInput() { jsonObject.add(REFERENCE_VIDEO_DESCRIPTION, JsonUtils.toJsonArray(referenceVideoDescription)); } + if (referenceUrls != null && !referenceUrls.isEmpty()) { + jsonObject.add(REFERENCE_URLS, JsonUtils.toJsonArray(referenceUrls)); + } + + if (referenceUrl != null && !referenceUrl.isEmpty()) { + jsonObject.addProperty(REFERENCE_URL, referenceUrl); + } + + if (media != null && !media.isEmpty()) { + jsonObject.add(MEDIA_URLS, JsonUtils.toJsonArray(media)); + } + if (extraInputs != null && !extraInputs.isEmpty()) { JsonObject extraInputsJsonObject = JsonUtils.parametersToJsonObject(extraInputs); JsonUtils.merge(jsonObject, extraInputsJsonObject); @@ -193,6 +227,9 @@ public Map getParameters() { if (enableOverlays != null) { params.put(ENABLE_OVERLAYS, enableOverlays); } + if (ratio != null) { + params.put(RATIO, ratio); + } params.putAll(super.getParameters()); return params; @@ -223,40 +260,210 @@ public ByteBuffer getBinaryData() { public void validate() throws InputRequiredException {} public void checkAndUpload() throws NoApiKeyException, UploadFileException { - Map inputChecks = new HashMap<>(); - inputChecks.put(IMG_URL, this.imgUrl); - inputChecks.put(AUDIO_URL, this.audioUrl); - inputChecks.put(FIRST_FRAME_URL, this.firstFrameUrl); - inputChecks.put(LAST_FRAME_URL, this.lastFrameUrl); - inputChecks.put(HEAD_FRAME, this.headFrame); - inputChecks.put(TAIL_FRAME, this.tailFrame); - int rvs = 0; + class UploadTaskResult { + final String key; + final String newUrl; + final boolean uploaded; + + UploadTaskResult(String key, String newUrl, boolean uploaded) { + this.key = key; + this.newUrl = newUrl; + this.uploaded = uploaded; + } + } + + List> futures = new ArrayList<>(); + + class TaskItem { + final String keyPrefix; + final String keyItemPrefix; + final String value; + final int index; + + TaskItem(String keyPrefix, String keyItemPrefix, String value, int index) { + this.keyPrefix = keyPrefix; + this.keyItemPrefix = keyItemPrefix; + this.value = value; + this.index = index; + } + + TaskItem(String keyPrefix, String value, int index) { + this.keyPrefix = keyPrefix; + this.keyItemPrefix = ""; + this.value = value; + this.index = index; + } + + TaskItem(String keyPrefix, String value) { + this.keyPrefix = keyPrefix; + this.keyItemPrefix = ""; + this.value = value; + this.index = -1; + } + + String getFullKey() { + return index >= 0 ? keyPrefix + keyItemPrefix + "[" + index + "]" : keyPrefix; + } + } + + List itemsToProcess = new ArrayList<>(); + + if (this.imgUrl != null) itemsToProcess.add(new TaskItem(IMG_URL, this.imgUrl)); + if (this.audioUrl != null) itemsToProcess.add(new TaskItem(AUDIO_URL, this.audioUrl)); + if (this.firstFrameUrl != null) + itemsToProcess.add(new TaskItem(FIRST_FRAME_URL, this.firstFrameUrl)); + if (this.lastFrameUrl != null) + itemsToProcess.add(new TaskItem(LAST_FRAME_URL, this.lastFrameUrl)); + if (this.headFrame != null) itemsToProcess.add(new TaskItem(HEAD_FRAME, this.headFrame)); + if (this.tailFrame != null) itemsToProcess.add(new TaskItem(TAIL_FRAME, this.tailFrame)); + if (this.referenceUrl != null) + itemsToProcess.add(new TaskItem(REFERENCE_URL, this.referenceUrl)); + if (this.referenceVideoUrls != null) { - rvs = this.referenceVideoUrls.size(); - for (int i = 0; i < rvs; i++) { - inputChecks.put(REFERENCE_VIDEO_URLS + "[" + i + "]", this.referenceVideoUrls.get(i)); + for (int i = 0; i < this.referenceVideoUrls.size(); i++) { + String url = this.referenceVideoUrls.get(i); + if (url != null) { + itemsToProcess.add(new TaskItem(REFERENCE_VIDEO_URLS, url, i)); + } + } + } + + if (this.referenceUrls != null) { + for (int i = 0; i < this.referenceUrls.size(); i++) { + String url = this.referenceUrls.get(i); + if (url != null) { + itemsToProcess.add(new TaskItem(REFERENCE_URLS, url, i)); + } + } + } + + if (this.media != null) { + for (int i = 0; i < this.media.size(); i++) { + Media media = this.media.get(i); + if (media != null) { + if (media.getUrl() != null) { + itemsToProcess.add(new TaskItem(MEDIA_URLS, "_URL", media.getUrl(), i)); + } + if (media.getReferenceVoice() != null) { + itemsToProcess.add( + new TaskItem(MEDIA_URLS, "_REFERENCE_VOICE", media.getReferenceVoice(), i)); + } + } + } + } + + if (itemsToProcess.isEmpty()) { + return; + } + + ExecutorService executor = Executors.newFixedThreadPool(5); + try { + for (TaskItem item : itemsToProcess) { + CompletableFuture future = + CompletableFuture.supplyAsync( + () -> { + Map singleCheckMap = new HashMap<>(); + String fullKey = item.getFullKey(); + singleCheckMap.put(fullKey, item.value); + + boolean isUploaded; + try { + isUploaded = + PreprocessInputImage.checkAndUploadImage( + getModel(), singleCheckMap, getApiKey()); + } catch (NoApiKeyException | UploadFileException e) { + throw new RuntimeException(e); + } + + return new UploadTaskResult(fullKey, singleCheckMap.get(fullKey), isUploaded); + }, + executor); + futures.add(future); + } + } finally { + executor.shutdown(); + try { + if (!executor.awaitTermination(60, TimeUnit.SECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException e) { + executor.shutdownNow(); + Thread.currentThread().interrupt(); } } - boolean isUpload = - PreprocessInputImage.checkAndUploadImage(getModel(), inputChecks, getApiKey()); + List results = new ArrayList<>(); + boolean globalIsUpload = false; + + try { + for (CompletableFuture future : futures) { + UploadTaskResult result = future.get(); + results.add(result); + if (result.uploaded) { + globalIsUpload = true; + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Upload process interrupted", e); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof NoApiKeyException) { + throw (NoApiKeyException) cause; + } else if (cause instanceof UploadFileException) { + throw (UploadFileException) cause; + } else { + throw new RuntimeException("Upload failed", cause); + } + } - if (isUpload) { + if (globalIsUpload) { this.putHeader("X-DashScope-OssResourceResolve", "enable"); - this.imgUrl = inputChecks.get(IMG_URL); - this.audioUrl = inputChecks.get(AUDIO_URL); - this.firstFrameUrl = inputChecks.get(FIRST_FRAME_URL); - this.lastFrameUrl = inputChecks.get(LAST_FRAME_URL); - this.headFrame = inputChecks.get(HEAD_FRAME); - this.tailFrame = inputChecks.get(TAIL_FRAME); - if (rvs > 0) { - List newVideos = new ArrayList<>(); - for (int i = 0; i < rvs; i++) { - newVideos.add(inputChecks.get(REFERENCE_VIDEO_URLS + "[" + i + "]")); + Map resultMap = + results.stream().collect(Collectors.toMap(r -> r.key, r -> r.newUrl)); + + if (resultMap.containsKey(IMG_URL)) this.imgUrl = resultMap.get(IMG_URL); + if (resultMap.containsKey(AUDIO_URL)) this.audioUrl = resultMap.get(AUDIO_URL); + if (resultMap.containsKey(FIRST_FRAME_URL)) + this.firstFrameUrl = resultMap.get(FIRST_FRAME_URL); + if (resultMap.containsKey(LAST_FRAME_URL)) this.lastFrameUrl = resultMap.get(LAST_FRAME_URL); + if (resultMap.containsKey(HEAD_FRAME)) this.headFrame = resultMap.get(HEAD_FRAME); + if (resultMap.containsKey(TAIL_FRAME)) this.tailFrame = resultMap.get(TAIL_FRAME); + if (resultMap.containsKey(REFERENCE_URL)) this.referenceUrl = resultMap.get(REFERENCE_URL); + + if (this.referenceVideoUrls != null && !this.referenceVideoUrls.isEmpty()) { + List newVideos = new ArrayList<>(this.referenceVideoUrls.size()); + for (int i = 0; i < this.referenceVideoUrls.size(); i++) { + String key = REFERENCE_VIDEO_URLS + "[" + i + "]"; + newVideos.add(resultMap.getOrDefault(key, this.referenceVideoUrls.get(i))); } this.referenceVideoUrls = newVideos; } + + if (this.referenceUrls != null && !this.referenceUrls.isEmpty()) { + List newRefs = new ArrayList<>(this.referenceUrls.size()); + for (int i = 0; i < this.referenceUrls.size(); i++) { + String key = REFERENCE_URLS + "[" + i + "]"; + newRefs.add(resultMap.getOrDefault(key, this.referenceUrls.get(i))); + } + this.referenceUrls = newRefs; + } + + if (this.media != null && !this.media.isEmpty()) { + for (int i = 0; i < this.media.size(); i++) { + Media mediaItem = this.media.get(i); + + String urlKey = MEDIA_URLS + "_URL[" + i + "]"; + String voiceKey = MEDIA_URLS + "_REFERENCE_VOICE[" + i + "]"; + if (resultMap.containsKey(urlKey)) { + mediaItem.setUrl(resultMap.get(urlKey)); + } + if (resultMap.containsKey(voiceKey)) { + mediaItem.setReferenceVoice(resultMap.get(voiceKey)); + } + } + } } } } diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 50eaba7..9d42153 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -194,6 +194,14 @@ public class ApiKeywords { public static final String REFERENCE_VIDEO_URLS = "reference_video_urls"; + public static final String REFERENCE_URLS = "reference_urls"; + + public static final String MEDIA_URLS = "media"; + + public static final String REFERENCE_URL = "reference_url"; + + public static final String RATIO = "ratio"; + public static final String REFERENCE_VIDEO_DESCRIPTION = "reference_video_description"; public static final String SHOT_TYPE = "shot_type"; From 6553761ffd51bfc6dbe7ae2ca21332805bd28488 Mon Sep 17 00:00:00 2001 From: x-zm <602187256@qq.com> Date: Fri, 13 Mar 2026 10:41:53 +0800 Subject: [PATCH 63/64] Fix/add miss wan27 params (#196) * add miss params audio_setting * add miss params audio_setting * Add empty conditions --------- Co-authored-by: mose-x.zm --- .../aigc/videosynthesis/VideoSynthesis.java | 6 ++++++ .../aigc/videosynthesis/VideoSynthesisParam.java | 13 +++++++++---- .../com/alibaba/dashscope/utils/ApiKeywords.java | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java index 570d2e1..8b3aca2 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesis.java @@ -71,6 +71,12 @@ public static class MediaType { public static final String DRIVING_AUDIO = "driving_audio"; } + public static class AudioSetting { + public static final String AUTO = "auto"; + public static final String ORIGIN = "origin"; + public static final String NO_AUDIO = "no_audio"; + } + /** * Create ApiServiceOption * diff --git a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java index 9f6412a..5ec391f 100644 --- a/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java +++ b/src/main/java/com/alibaba/dashscope/aigc/videosynthesis/VideoSynthesisParam.java @@ -119,6 +119,8 @@ public static class Media { @Builder.Default private String ratio = null; + @Builder.Default private String audioSetting = null; + /** The inputs of the model. */ @Override public JsonObject getInput() { @@ -196,11 +198,11 @@ public Map getParameters() { params.put(DURATION, duration); } - if (size != null) { + if (size != null && !size.isEmpty()) { params.put(SIZE, size); } - if (resolution != null) { + if (resolution != null && !resolution.isEmpty()) { params.put(RESOLUTION, resolution); } @@ -221,15 +223,18 @@ public Map getParameters() { if (audio != null) { params.put(AUDIO, audio); } - if (shotType != null) { + if (shotType != null && !shotType.isEmpty()) { params.put(SHOT_TYPE, shotType); } if (enableOverlays != null) { params.put(ENABLE_OVERLAYS, enableOverlays); } - if (ratio != null) { + if (ratio != null && !ratio.isEmpty()) { params.put(RATIO, ratio); } + if (audioSetting != null && !audioSetting.isEmpty()) { + params.put(AUDIO_SETTING, audioSetting); + } params.putAll(super.getParameters()); return params; diff --git a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java index 9d42153..bc60736 100644 --- a/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java +++ b/src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java @@ -202,6 +202,8 @@ public class ApiKeywords { public static final String RATIO = "ratio"; + public static final String AUDIO_SETTING = "audio_setting"; + public static final String REFERENCE_VIDEO_DESCRIPTION = "reference_video_description"; public static final String SHOT_TYPE = "shot_type"; From 98743c6a79acd1271e2b2fcda208ad65a4ddfcfc Mon Sep 17 00:00:00 2001 From: xiongbinbin Date: Mon, 16 Mar 2026 10:28:20 +0800 Subject: [PATCH 64/64] release version 2.22.12 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 211dbd3..b14ddd9 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ DashScope Java SDK com.alibaba dashscope-sdk-java - 2.22.11 + 2.22.12 8