支持GLM-4-Plus以及Zero思考推理模型

2025-04-29 15:59:58 +08:00 · 2024-12-31 11:32:25 +08:00 · 2024-12-31 11:32:25 +08:00 · 719e3b682f
commit 719e3b682f
parent 57b042d187
6 changed files with 107 additions and 30 deletions
--- a/README.md
+++ b/README.md
@ -9,7 +9,7 @@
 ![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg)
 ![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg)

-支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析，零配置部署，多路token支持，自动清理会话痕迹。
+支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析，零配置部署，多路token支持，自动清理会话痕迹。

 与ChatGPT接口完全兼容。

@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token]
 请求数据：
 ```json
 {
-    // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    // 默认模型：glm-4-plus
+    // zero思考推理模型：glm-4-zero / glm-4-think
+    // 如果使用智能体请填写智能体ID到此处
+    "model": "glm-4-plus",
    // 目前多轮对话基于消息合并实现，某些场景可能导致能力下降且受单轮最大token数限制
    // 如果您想获得原生的多轮对话体验，可以传入首轮消息获得的id，来接续上下文
    // "conversation_id": "65f6c28546bae1f0fbb532de",
@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token]
 {
    // 如果想获得原生多轮对话体验，此id，你可以传入到下一轮对话的conversation_id来接续上下文
    "id": "65f6c28546bae1f0fbb532de",
-    "model": "glm4",
+    "model": "glm-4",
    "object": "chat.completion",
    "choices": [
        {
@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token]
 ```json
 {
    // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    "model": "glm-4",
    "messages": [
        {
            "role": "user",
@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token]
 ```json
 {
    "id": "cnmuo7mcp7f9hjcmihn0",
-    "model": "glm4",
+    "model": "glm-4",
    "object": "chat.completion",
    "choices": [
        {
--- a/README_EN.md
+++ b/README_EN.md
@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token]
 Request data:
 ```json
 {
-    // Except using the Agent to fill the ID, fill in the model name as you like.
-    "model": "glm4",
+    // Default model: glm-4-plus
+    // zero thinking model: glm-4-zero / glm-4-think
+    // If using the Agent, fill in the Agent ID here
+    "model": "glm-4",
    // Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round.
    // If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context
    // "conversation_id": "65f6c28546bae1f0fbb532de",
@ -309,7 +311,7 @@ Response data：
 ```json
 {
    "id": "65f6c28546bae1f0fbb532de",
-    "model": "glm4",
+    "model": "glm-4",
    "object": "chat.completion",
    "choices": [
        {
@ -434,7 +436,7 @@ Request data:
 ```json
 {
    // 如果使用智能体请填写智能体ID到此处，否则可以乱填
-    "model": "glm4",
+    "model": "glm-4",
    "messages": [
        {
            "role": "user",
@ -461,7 +463,7 @@ Response data:
 ```json
 {
    "id": "cnmuo7mcp7f9hjcmihn0",
-    "model": "glm4",
+    "model": "glm-4",
    "object": "chat.completion",
    "choices": [
        {
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "glm-free-api",
-  "version": "0.0.33",
+  "version": "0.0.34",
  "description": "GLM Free API Server",
  "type": "module",
  "main": "dist/index.js",
--- a/src/api/controllers/chat.ts
+++ b/src/api/controllers/chat.ts
@ -17,6 +17,8 @@ import util from "@/lib/util.ts";
 const MODEL_NAME = "glm";
 // 默认的智能体ID，GLM4
 const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796";
+// zero推理模型智能体ID
+const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
 // access_token有效期
 const ACCESS_TOKEN_EXPIRES = 3600;
 // 最大重试次数
@ -165,13 +167,13 @@ async function removeConversation(
 *
 * @param messages 参考gpt系列消息格式，多轮对话请完整提供上下文
 * @param refreshToken 用于刷新access_token的refresh_token
- * @param assistantId 智能体ID，默认使用GLM4原版
+ * @param model 智能体ID，默认使用GLM4原版
 * @param retryCount 重试次数
 */
 async function createCompletion(
  messages: any[],
  refreshToken: string,
-  assistantId = DEFAULT_ASSISTANT_ID,
+  model = MODEL_NAME,
  refConvId = "",
  retryCount = 0
 ) {
@ -189,6 +191,13 @@ async function createCompletion(
    // 如果引用对话ID不正确则重置引用
    if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";

+    let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
+
+    if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
+      assistantId = ZERO_ASSISTANT_ID;
+      logger.info('使用思考模型');
+    }
+
    // 请求流
    const token = await acquireToken(refreshToken);
    const result = await axios.post(
@ -200,8 +209,11 @@ async function createCompletion(
        meta_data: {
          channel: "",
          draft_id: "",
+          if_plus_model: true,
          input_question_type: "xxxx",
          is_test: false,
+          platform: "pc",
+          quote_log_id: ""
        },
      },
      {
@ -231,7 +243,7 @@ async function createCompletion(

    const streamStartTime = util.timestamp();
    // 接收流为输出文本
-    const answer = await receiveStream(result.data);
+    const answer = await receiveStream(model, result.data);
    logger.success(
      `Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
    );
@ -251,7 +263,7 @@ async function createCompletion(
        return createCompletion(
          messages,
          refreshToken,
-          assistantId,
+          model,
          refConvId,
          retryCount + 1
        );
@ -266,13 +278,13 @@ async function createCompletion(
 *
 * @param messages 参考gpt系列消息格式，多轮对话请完整提供上下文
 * @param refreshToken 用于刷新access_token的refresh_token
- * @param assistantId 智能体ID，默认使用GLM4原版
+ * @param model 智能体ID，默认使用GLM4原版
 * @param retryCount 重试次数
 */
 async function createCompletionStream(
  messages: any[],
  refreshToken: string,
-  assistantId = DEFAULT_ASSISTANT_ID,
+  model = MODEL_NAME,
  refConvId = "",
  retryCount = 0
 ) {
@ -290,6 +302,13 @@ async function createCompletionStream(
    // 如果引用对话ID不正确则重置引用
    if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";

+    let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
+
+    if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
+      assistantId = ZERO_ASSISTANT_ID;
+      logger.info('使用思考模型');
+    }
+
    // 请求流
    const token = await acquireToken(refreshToken);
    const result = await axios.post(
@ -301,8 +320,11 @@ async function createCompletionStream(
        meta_data: {
          channel: "",
          draft_id: "",
+          if_plus_model: true,
          input_question_type: "xxxx",
          is_test: false,
+          platform: "pc",
+          quote_log_id: ""
        },
      },
      {
@ -354,7 +376,7 @@ async function createCompletionStream(

    const streamStartTime = util.timestamp();
    // 创建转换流将消息格式转换为gpt兼容格式
-    return createTransStream(result.data, (convId: string) => {
+    return createTransStream(model, result.data, (convId: string) => {
      logger.success(
        `Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
      );
@ -372,7 +394,7 @@ async function createCompletionStream(
        return createCompletionStream(
          messages,
          refreshToken,
-          assistantId,
+          model,
          refConvId,
          retryCount + 1
        );
@ -407,8 +429,11 @@ async function generateImages(
        meta_data: {
          channel: "",
          draft_id: "",
+          if_plus_model: true,
          input_question_type: "xxxx",
          is_test: false,
+          platform: "pc",
+          quote_log_id: ""
        },
      },
      {
@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) {
 /**
 * 从流接收完整的消息内容
 *
+ * @param model 模型
 * @param stream 消息流
 */
-async function receiveStream(stream: any): Promise<any> {
+async function receiveStream(model: string, stream: any): Promise<any> {
  return new Promise((resolve, reject) => {
    // 消息初始化
    const data = {
      id: "",
-      model: MODEL_NAME,
+      model,
      object: "chat.completion",
      choices: [
        {
@ -923,6 +949,8 @@ async function receiveStream(stream: any): Promise<any> {
      usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
      created: util.unixTimestamp(),
    };
+    const isSilentModel = model.indexOf('silent') != -1;
+    let thinkingText = "";
    let toolCall = false;
    let codeGenerating = false;
    let textChunkLength = 0;
@ -930,6 +958,7 @@ async function receiveStream(stream: any): Promise<any> {
    let lastExecutionOutput = "";
    let textOffset = 0;
    let refContent = "";
+    logger.info(`是否静默模型: ${isSilentModel}`);
    const parser = createParser((event) => {
      try {
        if (event.type !== "event") return;
@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise<any> {
                textChunkLength = 0;
                innerStr += "\n";
              }
+
              if (type == "text") {
                if (toolCall) {
                  innerStr += "\n";
@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise<any> {
                }
                if (partStatus == "finish") textChunkLength = text.length;
                return innerStr + text;
-              } else if (
+              } else if (type == "text_thinking" && !isSilentModel) {
+                if (toolCall) {
+                  innerStr += "\n";
+                  textOffset++;
+                  toolCall = false;
+                }
+                thinkingText = text;
+                return innerStr;
+              }else if (
                type == "quote_result" &&
                status == "finish" &&
                meta_data &&
-                _.isArray(meta_data.metadata_list)
+                _.isArray(meta_data.metadata_list) &&
+                !isSilentModel
              ) {
                refContent = meta_data.metadata_list.reduce((meta, v) => {
                  return meta + `${v.title} - ${v.url}\n`;
@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise<any> {
          );
          data.choices[0].message.content += chunk;
        } else {
+          if(thinkingText)
+            data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`;
          data.choices[0].message.content =
            data.choices[0].message.content.replace(
              /【\d+†(来源|源|source)】/g,
@ -1059,18 +1100,22 @@ async function receiveStream(stream: any): Promise<any> {
 *
 * 将流格式转换为gpt兼容流格式
 *
+ * @param model 模型
 * @param stream 消息流
 * @param endCallback 传输结束回调
 */
-function createTransStream(stream: any, endCallback?: Function) {
+function createTransStream(model: string, stream: any, endCallback?: Function) {
  // 消息创建时间
  const created = util.unixTimestamp();
  // 创建转换流
  const transStream = new PassThrough();
+  const isSilentModel = model.indexOf('silent') != -1;
  let content = "";
+  let thinking = false;
  let toolCall = false;
  let codeGenerating = false;
  let textChunkLength = 0;
+  let thinkingText = "";
  let codeTemp = "";
  let lastExecutionOutput = "";
  let textOffset = 0;
@ -1078,7 +1123,7 @@ function createTransStream(stream: any, endCallback?: Function) {
    transStream.write(
      `data: ${JSON.stringify({
        id: "",
-        model: MODEL_NAME,
+        model,
        object: "chat.completion.chunk",
        choices: [
          {
@ -1116,6 +1161,11 @@ function createTransStream(stream: any, endCallback?: Function) {
              innerStr += "\n";
            }
            if (type == "text") {
+              if(thinking) {
+                innerStr += "[思考结束]\n\n"
+                textOffset = thinkingText.length + 8;
+                thinking = false;
+              }
              if (toolCall) {
                innerStr += "\n";
                textOffset++;
@ -1123,11 +1173,26 @@ function createTransStream(stream: any, endCallback?: Function) {
              }
              if (partStatus == "finish") textChunkLength = text.length;
              return innerStr + text;
+            } else if (type == "text_thinking" && !isSilentModel) {
+              if(!thinking) {
+                innerStr += "[思考开始]\n";
+                textOffset = 7;
+                thinking = true;
+              }
+              if (toolCall) {
+                innerStr += "\n";
+                textOffset++;
+                toolCall = false;
+              }
+              if (partStatus == "finish") textChunkLength = text.length;
+              thinkingText += text.substring(thinkingText.length, text.length);
+              return innerStr + text;
            } else if (
              type == "quote_result" &&
              status == "finish" &&
              meta_data &&
-              _.isArray(meta_data.metadata_list)
+              _.isArray(meta_data.metadata_list) &&
+              !isSilentModel
            ) {
              const searchText =
                meta_data.metadata_list.reduce(
--- a/src/api/routes/chat.ts
+++ b/src/api/routes/chat.ts
@ -5,6 +5,9 @@ import Response from '@/lib/response/Response.ts';
 import chat from '@/api/controllers/chat.ts';
 import logger from '@/lib/logger.ts';

+// zero推理模型智能体ID
+const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
+
 export default {

    prefix: '/v1/chat',
@ -21,15 +24,15 @@ export default {
            // 随机挑选一个refresh_token
            const token = _.sample(tokens);
            const { model, conversation_id: convId, messages, stream } = request.body;
-            const assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined
+
            if (stream) {
-                const stream = await chat.createCompletionStream(messages, token, assistantId, convId);
+                const stream = await chat.createCompletionStream(messages, token, model, convId);
                return new Response(stream, {
                    type: "text/event-stream"
                });
            }
            else
-                return await chat.createCompletion(messages, token, assistantId, convId);
+                return await chat.createCompletion(messages, token, model, convId);
        }

    }
--- a/src/api/routes/models.ts
+++ b/src/api/routes/models.ts
@ -18,6 +18,11 @@ export default {
                        "object": "model",
                        "owned_by": "glm-free-api"
                    },
+                    {
+                        "id": "glm-4-plus",
+                        "object": "model",
+                        "owned_by": "glm-free-api"
+                    },
                    {
                        "id": "glm-4v",
                        "object": "model",