支持GLM-4-Plus以及Zero思考推理模型

This commit is contained in:
Vinlic 2024-12-31 11:32:25 +08:00
parent 57b042d187
commit 719e3b682f
6 changed files with 107 additions and 30 deletions

View File

@ -9,7 +9,7 @@
![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg)
![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg)
支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析零配置部署多路token支持自动清理会话痕迹。
支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析零配置部署多路token支持自动清理会话痕迹。
与ChatGPT接口完全兼容。
@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token]
请求数据:
```json
{
// 如果使用智能体请填写智能体ID到此处否则可以乱填
"model": "glm4",
// 默认模型glm-4-plus
// zero思考推理模型glm-4-zero / glm-4-think
// 如果使用智能体请填写智能体ID到此处
"model": "glm-4-plus",
// 目前多轮对话基于消息合并实现某些场景可能导致能力下降且受单轮最大token数限制
// 如果您想获得原生的多轮对话体验可以传入首轮消息获得的id来接续上下文
// "conversation_id": "65f6c28546bae1f0fbb532de",
@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token]
{
// 如果想获得原生多轮对话体验此id你可以传入到下一轮对话的conversation_id来接续上下文
"id": "65f6c28546bae1f0fbb532de",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token]
```json
{
// 如果使用智能体请填写智能体ID到此处否则可以乱填
"model": "glm4",
"model": "glm-4",
"messages": [
{
"role": "user",
@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token]
```json
{
"id": "cnmuo7mcp7f9hjcmihn0",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{

View File

@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token]
Request data:
```json
{
// Except using the Agent to fill the ID, fill in the model name as you like.
"model": "glm4",
// Default model: glm-4-plus
// zero thinking model: glm-4-zero / glm-4-think
// If using the Agent, fill in the Agent ID here
"model": "glm-4",
// Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round.
// If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context
// "conversation_id": "65f6c28546bae1f0fbb532de",
@ -309,7 +311,7 @@ Response data
```json
{
"id": "65f6c28546bae1f0fbb532de",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
@ -434,7 +436,7 @@ Request data:
```json
{
// 如果使用智能体请填写智能体ID到此处否则可以乱填
"model": "glm4",
"model": "glm-4",
"messages": [
{
"role": "user",
@ -461,7 +463,7 @@ Response data:
```json
{
"id": "cnmuo7mcp7f9hjcmihn0",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{

View File

@ -1,6 +1,6 @@
{
"name": "glm-free-api",
"version": "0.0.33",
"version": "0.0.34",
"description": "GLM Free API Server",
"type": "module",
"main": "dist/index.js",

View File

@ -17,6 +17,8 @@ import util from "@/lib/util.ts";
const MODEL_NAME = "glm";
// 默认的智能体IDGLM4
const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796";
// zero推理模型智能体ID
const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
// access_token有效期
const ACCESS_TOKEN_EXPIRES = 3600;
// 最大重试次数
@ -165,13 +167,13 @@ async function removeConversation(
*
* @param messages gpt系列消息格式
* @param refreshToken access_token的refresh_token
* @param assistantId ID使GLM4原版
* @param model ID使GLM4原版
* @param retryCount
*/
async function createCompletion(
messages: any[],
refreshToken: string,
assistantId = DEFAULT_ASSISTANT_ID,
model = MODEL_NAME,
refConvId = "",
retryCount = 0
) {
@ -189,6 +191,13 @@ async function createCompletion(
// 如果引用对话ID不正确则重置引用
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
assistantId = ZERO_ASSISTANT_ID;
logger.info('使用思考模型');
}
// 请求流
const token = await acquireToken(refreshToken);
const result = await axios.post(
@ -200,8 +209,11 @@ async function createCompletion(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
@ -231,7 +243,7 @@ async function createCompletion(
const streamStartTime = util.timestamp();
// 接收流为输出文本
const answer = await receiveStream(result.data);
const answer = await receiveStream(model, result.data);
logger.success(
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
);
@ -251,7 +263,7 @@ async function createCompletion(
return createCompletion(
messages,
refreshToken,
assistantId,
model,
refConvId,
retryCount + 1
);
@ -266,13 +278,13 @@ async function createCompletion(
*
* @param messages gpt系列消息格式
* @param refreshToken access_token的refresh_token
* @param assistantId ID使GLM4原版
* @param model ID使GLM4原版
* @param retryCount
*/
async function createCompletionStream(
messages: any[],
refreshToken: string,
assistantId = DEFAULT_ASSISTANT_ID,
model = MODEL_NAME,
refConvId = "",
retryCount = 0
) {
@ -290,6 +302,13 @@ async function createCompletionStream(
// 如果引用对话ID不正确则重置引用
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
assistantId = ZERO_ASSISTANT_ID;
logger.info('使用思考模型');
}
// 请求流
const token = await acquireToken(refreshToken);
const result = await axios.post(
@ -301,8 +320,11 @@ async function createCompletionStream(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
@ -354,7 +376,7 @@ async function createCompletionStream(
const streamStartTime = util.timestamp();
// 创建转换流将消息格式转换为gpt兼容格式
return createTransStream(result.data, (convId: string) => {
return createTransStream(model, result.data, (convId: string) => {
logger.success(
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
);
@ -372,7 +394,7 @@ async function createCompletionStream(
return createCompletionStream(
messages,
refreshToken,
assistantId,
model,
refConvId,
retryCount + 1
);
@ -407,8 +429,11 @@ async function generateImages(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) {
/**
*
*
* @param model
* @param stream
*/
async function receiveStream(stream: any): Promise<any> {
async function receiveStream(model: string, stream: any): Promise<any> {
return new Promise((resolve, reject) => {
// 消息初始化
const data = {
id: "",
model: MODEL_NAME,
model,
object: "chat.completion",
choices: [
{
@ -923,6 +949,8 @@ async function receiveStream(stream: any): Promise<any> {
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
created: util.unixTimestamp(),
};
const isSilentModel = model.indexOf('silent') != -1;
let thinkingText = "";
let toolCall = false;
let codeGenerating = false;
let textChunkLength = 0;
@ -930,6 +958,7 @@ async function receiveStream(stream: any): Promise<any> {
let lastExecutionOutput = "";
let textOffset = 0;
let refContent = "";
logger.info(`是否静默模型: ${isSilentModel}`);
const parser = createParser((event) => {
try {
if (event.type !== "event") return;
@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise<any> {
textChunkLength = 0;
innerStr += "\n";
}
if (type == "text") {
if (toolCall) {
innerStr += "\n";
@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise<any> {
}
if (partStatus == "finish") textChunkLength = text.length;
return innerStr + text;
} else if (
} else if (type == "text_thinking" && !isSilentModel) {
if (toolCall) {
innerStr += "\n";
textOffset++;
toolCall = false;
}
thinkingText = text;
return innerStr;
}else if (
type == "quote_result" &&
status == "finish" &&
meta_data &&
_.isArray(meta_data.metadata_list)
_.isArray(meta_data.metadata_list) &&
!isSilentModel
) {
refContent = meta_data.metadata_list.reduce((meta, v) => {
return meta + `${v.title} - ${v.url}\n`;
@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise<any> {
);
data.choices[0].message.content += chunk;
} else {
if(thinkingText)
data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`;
data.choices[0].message.content =
data.choices[0].message.content.replace(
/【\d+†(来源|源|source)】/g,
@ -1059,18 +1100,22 @@ async function receiveStream(stream: any): Promise<any> {
*
* gpt兼容流格式
*
* @param model
* @param stream
* @param endCallback
*/
function createTransStream(stream: any, endCallback?: Function) {
function createTransStream(model: string, stream: any, endCallback?: Function) {
// 消息创建时间
const created = util.unixTimestamp();
// 创建转换流
const transStream = new PassThrough();
const isSilentModel = model.indexOf('silent') != -1;
let content = "";
let thinking = false;
let toolCall = false;
let codeGenerating = false;
let textChunkLength = 0;
let thinkingText = "";
let codeTemp = "";
let lastExecutionOutput = "";
let textOffset = 0;
@ -1078,7 +1123,7 @@ function createTransStream(stream: any, endCallback?: Function) {
transStream.write(
`data: ${JSON.stringify({
id: "",
model: MODEL_NAME,
model,
object: "chat.completion.chunk",
choices: [
{
@ -1116,6 +1161,11 @@ function createTransStream(stream: any, endCallback?: Function) {
innerStr += "\n";
}
if (type == "text") {
if(thinking) {
innerStr += "[思考结束]\n\n"
textOffset = thinkingText.length + 8;
thinking = false;
}
if (toolCall) {
innerStr += "\n";
textOffset++;
@ -1123,11 +1173,26 @@ function createTransStream(stream: any, endCallback?: Function) {
}
if (partStatus == "finish") textChunkLength = text.length;
return innerStr + text;
} else if (type == "text_thinking" && !isSilentModel) {
if(!thinking) {
innerStr += "[思考开始]\n";
textOffset = 7;
thinking = true;
}
if (toolCall) {
innerStr += "\n";
textOffset++;
toolCall = false;
}
if (partStatus == "finish") textChunkLength = text.length;
thinkingText += text.substring(thinkingText.length, text.length);
return innerStr + text;
} else if (
type == "quote_result" &&
status == "finish" &&
meta_data &&
_.isArray(meta_data.metadata_list)
_.isArray(meta_data.metadata_list) &&
!isSilentModel
) {
const searchText =
meta_data.metadata_list.reduce(

View File

@ -5,6 +5,9 @@ import Response from '@/lib/response/Response.ts';
import chat from '@/api/controllers/chat.ts';
import logger from '@/lib/logger.ts';
// zero推理模型智能体ID
const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
export default {
prefix: '/v1/chat',
@ -21,15 +24,15 @@ export default {
// 随机挑选一个refresh_token
const token = _.sample(tokens);
const { model, conversation_id: convId, messages, stream } = request.body;
const assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined
if (stream) {
const stream = await chat.createCompletionStream(messages, token, assistantId, convId);
const stream = await chat.createCompletionStream(messages, token, model, convId);
return new Response(stream, {
type: "text/event-stream"
});
}
else
return await chat.createCompletion(messages, token, assistantId, convId);
return await chat.createCompletion(messages, token, model, convId);
}
}

View File

@ -18,6 +18,11 @@ export default {
"object": "model",
"owned_by": "glm-free-api"
},
{
"id": "glm-4-plus",
"object": "model",
"owned_by": "glm-free-api"
},
{
"id": "glm-4v",
"object": "model",