diff --git a/README.md b/README.md
index 764ce03..aa13623 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 ![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg)
 ![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg)
 
-支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析，零配置部署，多路token支持，自动清理会话痕迹。
+支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析，零配置部署，多路token支持，自动清理会话痕迹。
 
 与ChatGPT接口完全兼容。
 
@@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token]
 请求数据：
 ```json
 {
-    // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    // 默认模型：glm-4-plus
+    // zero思考推理模型：glm-4-zero / glm-4-think
+    // 如果使用智能体请填写智能体ID到此处
+    "model": "glm-4-plus",
     // 目前多轮对话基于消息合并实现，某些场景可能导致能力下降且受单轮最大token数限制
     // 如果您想获得原生的多轮对话体验，可以传入首轮消息获得的id，来接续上下文
     // "conversation_id": "65f6c28546bae1f0fbb532de",
@@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token]
 {
     // 如果想获得原生多轮对话体验，此id，你可以传入到下一轮对话的conversation_id来接续上下文
     "id": "65f6c28546bae1f0fbb532de",
-    "model": "glm4",
+    "model": "glm-4",
     "object": "chat.completion",
     "choices": [
         {
@@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token]
 ```json
 {
     // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    "model": "glm-4",
     "messages": [
         {
             "role": "user",
@@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token]
 ```json
 {
     "id": "cnmuo7mcp7f9hjcmihn0",
-    "model": "glm4",
+    "model": "glm-4",
     "object": "chat.completion",
     "choices": [
         {
diff --git a/README_EN.md b/README_EN.md
index 2f5b6e9..ab5fd04 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token]
 Request data:
 ```json
 {
-    // Except using the Agent to fill the ID, fill in the model name as you like.
-    "model": "glm4",
+    // Default model: glm-4-plus
+    // zero thinking model: glm-4-zero / glm-4-think
+    // If using the Agent, fill in the Agent ID here
+    "model": "glm-4",
     // Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round.
     // If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context
     // "conversation_id": "65f6c28546bae1f0fbb532de",
@@ -309,7 +311,7 @@ Response data：
 ```json
 {
     "id": "65f6c28546bae1f0fbb532de",
-    "model": "glm4",
+    "model": "glm-4",
     "object": "chat.completion",
     "choices": [
         {
@@ -434,7 +436,7 @@ Request data:
 ```json
 {
     // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    "model": "glm-4",
     "messages": [
         {
             "role": "user",
@@ -461,7 +463,7 @@ Response data:
 ```json
 {
     "id": "cnmuo7mcp7f9hjcmihn0",
-    "model": "glm4",
+    "model": "glm-4",
     "object": "chat.completion",
     "choices": [
         {
diff --git a/package.json b/package.json
index fc0170b..cfd1e10 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "glm-free-api",
-  "version": "0.0.33",
+  "version": "0.0.34",
   "description": "GLM Free API Server",
   "type": "module",
   "main": "dist/index.js",
diff --git a/src/api/controllers/chat.ts b/src/api/controllers/chat.ts
index 32bb807..7a3dc81 100644
--- a/src/api/controllers/chat.ts
+++ b/src/api/controllers/chat.ts
@@ -17,6 +17,8 @@ import util from "@/lib/util.ts";
 const MODEL_NAME = "glm";
 // 默认的智能体ID，GLM4
 const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796";
+// zero推理模型智能体ID
+const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
 // access_token有效期
 const ACCESS_TOKEN_EXPIRES = 3600;
 // 最大重试次数
@@ -165,13 +167,13 @@ async function removeConversation(
  *
  * @param messages 参考gpt系列消息格式，多轮对话请完整提供上下文
  * @param refreshToken 用于刷新access_token的refresh_token
- * @param assistantId 智能体ID，默认使用GLM4原版
+ * @param model 智能体ID，默认使用GLM4原版
  * @param retryCount 重试次数
  */
 async function createCompletion(
   messages: any[],
   refreshToken: string,
-  assistantId = DEFAULT_ASSISTANT_ID,
+  model = MODEL_NAME,
   refConvId = "",
   retryCount = 0
 ) {
@@ -189,6 +191,13 @@ async function createCompletion(
     // 如果引用对话ID不正确则重置引用
     if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
 
+    let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
+
+    if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
+      assistantId = ZERO_ASSISTANT_ID;
+      logger.info('使用思考模型');
+    }
+
     // 请求流
     const token = await acquireToken(refreshToken);
     const result = await axios.post(
@@ -200,8 +209,11 @@ async function createCompletion(
         meta_data: {
           channel: "",
           draft_id: "",
+          if_plus_model: true,
           input_question_type: "xxxx",
           is_test: false,
+          platform: "pc",
+          quote_log_id: ""
         },
       },
       {
@@ -231,7 +243,7 @@ async function createCompletion(
 
     const streamStartTime = util.timestamp();
     // 接收流为输出文本
-    const answer = await receiveStream(result.data);
+    const answer = await receiveStream(model, result.data);
     logger.success(
       `Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
     );
@@ -251,7 +263,7 @@ async function createCompletion(
         return createCompletion(
           messages,
           refreshToken,
-          assistantId,
+          model,
           refConvId,
           retryCount + 1
         );
@@ -266,13 +278,13 @@ async function createCompletion(
  *
  * @param messages 参考gpt系列消息格式，多轮对话请完整提供上下文
  * @param refreshToken 用于刷新access_token的refresh_token
- * @param assistantId 智能体ID，默认使用GLM4原版
+ * @param model 智能体ID，默认使用GLM4原版
  * @param retryCount 重试次数
  */
 async function createCompletionStream(
   messages: any[],
   refreshToken: string,
-  assistantId = DEFAULT_ASSISTANT_ID,
+  model = MODEL_NAME,
   refConvId = "",
   retryCount = 0
 ) {
@@ -290,6 +302,13 @@ async function createCompletionStream(
     // 如果引用对话ID不正确则重置引用
     if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
 
+    let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
+
+    if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
+      assistantId = ZERO_ASSISTANT_ID;
+      logger.info('使用思考模型');
+    }
+
     // 请求流
     const token = await acquireToken(refreshToken);
     const result = await axios.post(
@@ -301,8 +320,11 @@ async function createCompletionStream(
         meta_data: {
           channel: "",
           draft_id: "",
+          if_plus_model: true,
           input_question_type: "xxxx",
           is_test: false,
+          platform: "pc",
+          quote_log_id: ""
         },
       },
       {
@@ -354,7 +376,7 @@ async function createCompletionStream(
 
     const streamStartTime = util.timestamp();
     // 创建转换流将消息格式转换为gpt兼容格式
-    return createTransStream(result.data, (convId: string) => {
+    return createTransStream(model, result.data, (convId: string) => {
       logger.success(
         `Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
       );
@@ -372,7 +394,7 @@ async function createCompletionStream(
         return createCompletionStream(
           messages,
           refreshToken,
-          assistantId,
+          model,
           refConvId,
           retryCount + 1
         );
@@ -407,8 +429,11 @@ async function generateImages(
         meta_data: {
           channel: "",
           draft_id: "",
+          if_plus_model: true,
           input_question_type: "xxxx",
           is_test: false,
+          platform: "pc",
+          quote_log_id: ""
         },
       },
       {
@@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) {
 /**
  * 从流接收完整的消息内容
  *
+ * @param model 模型
  * @param stream 消息流
  */
-async function receiveStream(stream: any): Promise<any> {
+async function receiveStream(model: string, stream: any): Promise<any> {
   return new Promise((resolve, reject) => {
     // 消息初始化
     const data = {
       id: "",
-      model: MODEL_NAME,
+      model,
       object: "chat.completion",
       choices: [
         {
@@ -923,6 +949,8 @@ async function receiveStream(stream: any): Promise<any> {
       usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
       created: util.unixTimestamp(),
     };
+    const isSilentModel = model.indexOf('silent') != -1;
+    let thinkingText = "";
     let toolCall = false;
     let codeGenerating = false;
     let textChunkLength = 0;
@@ -930,6 +958,7 @@ async function receiveStream(stream: any): Promise<any> {
     let lastExecutionOutput = "";
     let textOffset = 0;
     let refContent = "";
+    logger.info(`是否静默模型: ${isSilentModel}`);
     const parser = createParser((event) => {
       try {
         if (event.type !== "event") return;
@@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise<any> {
                 textChunkLength = 0;
                 innerStr += "\n";
               }
+
               if (type == "text") {
                 if (toolCall) {
                   innerStr += "\n";
@@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise<any> {
                 }
                 if (partStatus == "finish") textChunkLength = text.length;
                 return innerStr + text;
-              } else if (
+              } else if (type == "text_thinking" && !isSilentModel) {
+                if (toolCall) {
+                  innerStr += "\n";
+                  textOffset++;
+                  toolCall = false;
+                }
+                thinkingText = text;
+                return innerStr;
+              }else if (
                 type == "quote_result" &&
                 status == "finish" &&
                 meta_data &&
-                _.isArray(meta_data.metadata_list)
+                _.isArray(meta_data.metadata_list) &&
+                !isSilentModel
               ) {
                 refContent = meta_data.metadata_list.reduce((meta, v) => {
                   return meta + `${v.title} - ${v.url}\n`;
@@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise<any> {
           );
           data.choices[0].message.content += chunk;
         } else {
+          if(thinkingText)
+            data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`;
           data.choices[0].message.content =
             data.choices[0].message.content.replace(
               /【\d+†(来源|源|source)】/g,
@@ -1059,18 +1100,22 @@ async function receiveStream(stream: any): Promise<any> {
  *
  * 将流格式转换为gpt兼容流格式
  *
+ * @param model 模型
  * @param stream 消息流
  * @param endCallback 传输结束回调
  */
-function createTransStream(stream: any, endCallback?: Function) {
+function createTransStream(model: string, stream: any, endCallback?: Function) {
   // 消息创建时间
   const created = util.unixTimestamp();
   // 创建转换流
   const transStream = new PassThrough();
+  const isSilentModel = model.indexOf('silent') != -1;
   let content = "";
+  let thinking = false;
   let toolCall = false;
   let codeGenerating = false;
   let textChunkLength = 0;
+  let thinkingText = "";
   let codeTemp = "";
   let lastExecutionOutput = "";
   let textOffset = 0;
@@ -1078,7 +1123,7 @@ function createTransStream(stream: any, endCallback?: Function) {
     transStream.write(
       `data: ${JSON.stringify({
         id: "",
-        model: MODEL_NAME,
+        model,
         object: "chat.completion.chunk",
         choices: [
           {
@@ -1116,6 +1161,11 @@ function createTransStream(stream: any, endCallback?: Function) {
               innerStr += "\n";
             }
             if (type == "text") {
+              if(thinking) {
+                innerStr += "[思考结束]\n\n"
+                textOffset = thinkingText.length + 8;
+                thinking = false;
+              }
               if (toolCall) {
                 innerStr += "\n";
                 textOffset++;
@@ -1123,11 +1173,26 @@ function createTransStream(stream: any, endCallback?: Function) {
               }
               if (partStatus == "finish") textChunkLength = text.length;
               return innerStr + text;
+            } else if (type == "text_thinking" && !isSilentModel) {
+              if(!thinking) {
+                innerStr += "[思考开始]\n";
+                textOffset = 7;
+                thinking = true;
+              }
+              if (toolCall) {
+                innerStr += "\n";
+                textOffset++;
+                toolCall = false;
+              }
+              if (partStatus == "finish") textChunkLength = text.length;
+              thinkingText += text.substring(thinkingText.length, text.length);
+              return innerStr + text;
             } else if (
               type == "quote_result" &&
               status == "finish" &&
               meta_data &&
-              _.isArray(meta_data.metadata_list)
+              _.isArray(meta_data.metadata_list) &&
+              !isSilentModel
             ) {
               const searchText =
                 meta_data.metadata_list.reduce(
diff --git a/src/api/routes/chat.ts b/src/api/routes/chat.ts
index 12782c0..88b1662 100644
--- a/src/api/routes/chat.ts
+++ b/src/api/routes/chat.ts
@@ -5,6 +5,9 @@ import Response from '@/lib/response/Response.ts';
 import chat from '@/api/controllers/chat.ts';
 import logger from '@/lib/logger.ts';
 
+// zero推理模型智能体ID
+const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
+
 export default {
 
     prefix: '/v1/chat',
@@ -21,15 +24,15 @@ export default {
             // 随机挑选一个refresh_token
             const token = _.sample(tokens);
             const { model, conversation_id: convId, messages, stream } = request.body;
-            const assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined
+
             if (stream) {
-                const stream = await chat.createCompletionStream(messages, token, assistantId, convId);
+                const stream = await chat.createCompletionStream(messages, token, model, convId);
                 return new Response(stream, {
                     type: "text/event-stream"
                 });
             }
             else
-                return await chat.createCompletion(messages, token, assistantId, convId);
+                return await chat.createCompletion(messages, token, model, convId);
         }
 
     }
diff --git a/src/api/routes/models.ts b/src/api/routes/models.ts
index f776cb0..b013eb9 100644
--- a/src/api/routes/models.ts
+++ b/src/api/routes/models.ts
@@ -18,6 +18,11 @@ export default {
                         "object": "model",
                         "owned_by": "glm-free-api"
                     },
+                    {
+                        "id": "glm-4-plus",
+                        "object": "model",
+                        "owned_by": "glm-free-api"
+                    },
                     {
                         "id": "glm-4v",
                         "object": "model",