diff --git a/README.md b/README.md index 764ce03..aa13623 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ ![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg) ![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg) -支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。 +支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。 与ChatGPT接口完全兼容。 @@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token] 请求数据: ```json { - // 如果使用智能体请填写智能体ID到此处,否则可以乱填 - "model": "glm4", + // 默认模型:glm-4-plus + // zero思考推理模型:glm-4-zero / glm-4-think + // 如果使用智能体请填写智能体ID到此处 + "model": "glm-4-plus", // 目前多轮对话基于消息合并实现,某些场景可能导致能力下降且受单轮最大token数限制 // 如果您想获得原生的多轮对话体验,可以传入首轮消息获得的id,来接续上下文 // "conversation_id": "65f6c28546bae1f0fbb532de", @@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token] { // 如果想获得原生多轮对话体验,此id,你可以传入到下一轮对话的conversation_id来接续上下文 "id": "65f6c28546bae1f0fbb532de", - "model": "glm4", + "model": "glm-4", "object": "chat.completion", "choices": [ { @@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token] ```json { // 如果使用智能体请填写智能体ID到此处,否则可以乱填 - "model": "glm4", + "model": "glm-4", "messages": [ { "role": "user", @@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token] ```json { "id": "cnmuo7mcp7f9hjcmihn0", - "model": "glm4", + "model": "glm-4", "object": "chat.completion", "choices": [ { diff --git a/README_EN.md b/README_EN.md index 2f5b6e9..ab5fd04 100644 --- a/README_EN.md +++ b/README_EN.md @@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token] Request data: ```json { - // Except using the Agent to fill the ID, fill in the model name as you like. - "model": "glm4", + // Default model: glm-4-plus + // zero thinking model: glm-4-zero / glm-4-think + // If using the Agent, fill in the Agent ID here + "model": "glm-4", // Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round. // If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context // "conversation_id": "65f6c28546bae1f0fbb532de", @@ -309,7 +311,7 @@ Response data: ```json { "id": "65f6c28546bae1f0fbb532de", - "model": "glm4", + "model": "glm-4", "object": "chat.completion", "choices": [ { @@ -434,7 +436,7 @@ Request data: ```json { // 如果使用智能体请填写智能体ID到此处,否则可以乱填 - "model": "glm4", + "model": "glm-4", "messages": [ { "role": "user", @@ -461,7 +463,7 @@ Response data: ```json { "id": "cnmuo7mcp7f9hjcmihn0", - "model": "glm4", + "model": "glm-4", "object": "chat.completion", "choices": [ { diff --git a/package.json b/package.json index fc0170b..cfd1e10 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "glm-free-api", - "version": "0.0.33", + "version": "0.0.34", "description": "GLM Free API Server", "type": "module", "main": "dist/index.js", diff --git a/src/api/controllers/chat.ts b/src/api/controllers/chat.ts index 32bb807..7a3dc81 100644 --- a/src/api/controllers/chat.ts +++ b/src/api/controllers/chat.ts @@ -17,6 +17,8 @@ import util from "@/lib/util.ts"; const MODEL_NAME = "glm"; // 默认的智能体ID,GLM4 const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796"; +// zero推理模型智能体ID +const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31"; // access_token有效期 const ACCESS_TOKEN_EXPIRES = 3600; // 最大重试次数 @@ -165,13 +167,13 @@ async function removeConversation( * * @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文 * @param refreshToken 用于刷新access_token的refresh_token - * @param assistantId 智能体ID,默认使用GLM4原版 + * @param model 智能体ID,默认使用GLM4原版 * @param retryCount 重试次数 */ async function createCompletion( messages: any[], refreshToken: string, - assistantId = DEFAULT_ASSISTANT_ID, + model = MODEL_NAME, refConvId = "", retryCount = 0 ) { @@ -189,6 +191,13 @@ async function createCompletion( // 如果引用对话ID不正确则重置引用 if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = ""; + let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined; + + if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) { + assistantId = ZERO_ASSISTANT_ID; + logger.info('使用思考模型'); + } + // 请求流 const token = await acquireToken(refreshToken); const result = await axios.post( @@ -200,8 +209,11 @@ async function createCompletion( meta_data: { channel: "", draft_id: "", + if_plus_model: true, input_question_type: "xxxx", is_test: false, + platform: "pc", + quote_log_id: "" }, }, { @@ -231,7 +243,7 @@ async function createCompletion( const streamStartTime = util.timestamp(); // 接收流为输出文本 - const answer = await receiveStream(result.data); + const answer = await receiveStream(model, result.data); logger.success( `Stream has completed transfer ${util.timestamp() - streamStartTime}ms` ); @@ -251,7 +263,7 @@ async function createCompletion( return createCompletion( messages, refreshToken, - assistantId, + model, refConvId, retryCount + 1 ); @@ -266,13 +278,13 @@ async function createCompletion( * * @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文 * @param refreshToken 用于刷新access_token的refresh_token - * @param assistantId 智能体ID,默认使用GLM4原版 + * @param model 智能体ID,默认使用GLM4原版 * @param retryCount 重试次数 */ async function createCompletionStream( messages: any[], refreshToken: string, - assistantId = DEFAULT_ASSISTANT_ID, + model = MODEL_NAME, refConvId = "", retryCount = 0 ) { @@ -290,6 +302,13 @@ async function createCompletionStream( // 如果引用对话ID不正确则重置引用 if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = ""; + let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined; + + if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) { + assistantId = ZERO_ASSISTANT_ID; + logger.info('使用思考模型'); + } + // 请求流 const token = await acquireToken(refreshToken); const result = await axios.post( @@ -301,8 +320,11 @@ async function createCompletionStream( meta_data: { channel: "", draft_id: "", + if_plus_model: true, input_question_type: "xxxx", is_test: false, + platform: "pc", + quote_log_id: "" }, }, { @@ -354,7 +376,7 @@ async function createCompletionStream( const streamStartTime = util.timestamp(); // 创建转换流将消息格式转换为gpt兼容格式 - return createTransStream(result.data, (convId: string) => { + return createTransStream(model, result.data, (convId: string) => { logger.success( `Stream has completed transfer ${util.timestamp() - streamStartTime}ms` ); @@ -372,7 +394,7 @@ async function createCompletionStream( return createCompletionStream( messages, refreshToken, - assistantId, + model, refConvId, retryCount + 1 ); @@ -407,8 +429,11 @@ async function generateImages( meta_data: { channel: "", draft_id: "", + if_plus_model: true, input_question_type: "xxxx", is_test: false, + platform: "pc", + quote_log_id: "" }, }, { @@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) { /** * 从流接收完整的消息内容 * + * @param model 模型 * @param stream 消息流 */ -async function receiveStream(stream: any): Promise { +async function receiveStream(model: string, stream: any): Promise { return new Promise((resolve, reject) => { // 消息初始化 const data = { id: "", - model: MODEL_NAME, + model, object: "chat.completion", choices: [ { @@ -923,6 +949,8 @@ async function receiveStream(stream: any): Promise { usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, created: util.unixTimestamp(), }; + const isSilentModel = model.indexOf('silent') != -1; + let thinkingText = ""; let toolCall = false; let codeGenerating = false; let textChunkLength = 0; @@ -930,6 +958,7 @@ async function receiveStream(stream: any): Promise { let lastExecutionOutput = ""; let textOffset = 0; let refContent = ""; + logger.info(`是否静默模型: ${isSilentModel}`); const parser = createParser((event) => { try { if (event.type !== "event") return; @@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise { textChunkLength = 0; innerStr += "\n"; } + if (type == "text") { if (toolCall) { innerStr += "\n"; @@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise { } if (partStatus == "finish") textChunkLength = text.length; return innerStr + text; - } else if ( + } else if (type == "text_thinking" && !isSilentModel) { + if (toolCall) { + innerStr += "\n"; + textOffset++; + toolCall = false; + } + thinkingText = text; + return innerStr; + }else if ( type == "quote_result" && status == "finish" && meta_data && - _.isArray(meta_data.metadata_list) + _.isArray(meta_data.metadata_list) && + !isSilentModel ) { refContent = meta_data.metadata_list.reduce((meta, v) => { return meta + `${v.title} - ${v.url}\n`; @@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise { ); data.choices[0].message.content += chunk; } else { + if(thinkingText) + data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`; data.choices[0].message.content = data.choices[0].message.content.replace( /【\d+†(来源|源|source)】/g, @@ -1059,18 +1100,22 @@ async function receiveStream(stream: any): Promise { * * 将流格式转换为gpt兼容流格式 * + * @param model 模型 * @param stream 消息流 * @param endCallback 传输结束回调 */ -function createTransStream(stream: any, endCallback?: Function) { +function createTransStream(model: string, stream: any, endCallback?: Function) { // 消息创建时间 const created = util.unixTimestamp(); // 创建转换流 const transStream = new PassThrough(); + const isSilentModel = model.indexOf('silent') != -1; let content = ""; + let thinking = false; let toolCall = false; let codeGenerating = false; let textChunkLength = 0; + let thinkingText = ""; let codeTemp = ""; let lastExecutionOutput = ""; let textOffset = 0; @@ -1078,7 +1123,7 @@ function createTransStream(stream: any, endCallback?: Function) { transStream.write( `data: ${JSON.stringify({ id: "", - model: MODEL_NAME, + model, object: "chat.completion.chunk", choices: [ { @@ -1116,6 +1161,11 @@ function createTransStream(stream: any, endCallback?: Function) { innerStr += "\n"; } if (type == "text") { + if(thinking) { + innerStr += "[思考结束]\n\n" + textOffset = thinkingText.length + 8; + thinking = false; + } if (toolCall) { innerStr += "\n"; textOffset++; @@ -1123,11 +1173,26 @@ function createTransStream(stream: any, endCallback?: Function) { } if (partStatus == "finish") textChunkLength = text.length; return innerStr + text; + } else if (type == "text_thinking" && !isSilentModel) { + if(!thinking) { + innerStr += "[思考开始]\n"; + textOffset = 7; + thinking = true; + } + if (toolCall) { + innerStr += "\n"; + textOffset++; + toolCall = false; + } + if (partStatus == "finish") textChunkLength = text.length; + thinkingText += text.substring(thinkingText.length, text.length); + return innerStr + text; } else if ( type == "quote_result" && status == "finish" && meta_data && - _.isArray(meta_data.metadata_list) + _.isArray(meta_data.metadata_list) && + !isSilentModel ) { const searchText = meta_data.metadata_list.reduce( diff --git a/src/api/routes/chat.ts b/src/api/routes/chat.ts index 12782c0..88b1662 100644 --- a/src/api/routes/chat.ts +++ b/src/api/routes/chat.ts @@ -5,6 +5,9 @@ import Response from '@/lib/response/Response.ts'; import chat from '@/api/controllers/chat.ts'; import logger from '@/lib/logger.ts'; +// zero推理模型智能体ID +const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31"; + export default { prefix: '/v1/chat', @@ -21,15 +24,15 @@ export default { // 随机挑选一个refresh_token const token = _.sample(tokens); const { model, conversation_id: convId, messages, stream } = request.body; - const assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined + if (stream) { - const stream = await chat.createCompletionStream(messages, token, assistantId, convId); + const stream = await chat.createCompletionStream(messages, token, model, convId); return new Response(stream, { type: "text/event-stream" }); } else - return await chat.createCompletion(messages, token, assistantId, convId); + return await chat.createCompletion(messages, token, model, convId); } } diff --git a/src/api/routes/models.ts b/src/api/routes/models.ts index f776cb0..b013eb9 100644 --- a/src/api/routes/models.ts +++ b/src/api/routes/models.ts @@ -18,6 +18,11 @@ export default { "object": "model", "owned_by": "glm-free-api" }, + { + "id": "glm-4-plus", + "object": "model", + "owned_by": "glm-free-api" + }, { "id": "glm-4v", "object": "model",