mirror of
https://github.com/LLM-Red-Team/glm-free-api.git
synced 2025-01-23 21:31:33 +08:00
支持GLM-4-Plus以及Zero思考推理模型
This commit is contained in:
parent
57b042d187
commit
719e3b682f
14
README.md
14
README.md
@ -9,7 +9,7 @@
|
||||
![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg)
|
||||
![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg)
|
||||
|
||||
支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。
|
||||
支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。
|
||||
|
||||
与ChatGPT接口完全兼容。
|
||||
|
||||
@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token]
|
||||
请求数据:
|
||||
```json
|
||||
{
|
||||
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
|
||||
"model": "glm4",
|
||||
// 默认模型:glm-4-plus
|
||||
// zero思考推理模型:glm-4-zero / glm-4-think
|
||||
// 如果使用智能体请填写智能体ID到此处
|
||||
"model": "glm-4-plus",
|
||||
// 目前多轮对话基于消息合并实现,某些场景可能导致能力下降且受单轮最大token数限制
|
||||
// 如果您想获得原生的多轮对话体验,可以传入首轮消息获得的id,来接续上下文
|
||||
// "conversation_id": "65f6c28546bae1f0fbb532de",
|
||||
@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token]
|
||||
{
|
||||
// 如果想获得原生多轮对话体验,此id,你可以传入到下一轮对话的conversation_id来接续上下文
|
||||
"id": "65f6c28546bae1f0fbb532de",
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token]
|
||||
```json
|
||||
{
|
||||
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token]
|
||||
```json
|
||||
{
|
||||
"id": "cnmuo7mcp7f9hjcmihn0",
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
|
12
README_EN.md
12
README_EN.md
@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token]
|
||||
Request data:
|
||||
```json
|
||||
{
|
||||
// Except using the Agent to fill the ID, fill in the model name as you like.
|
||||
"model": "glm4",
|
||||
// Default model: glm-4-plus
|
||||
// zero thinking model: glm-4-zero / glm-4-think
|
||||
// If using the Agent, fill in the Agent ID here
|
||||
"model": "glm-4",
|
||||
// Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round.
|
||||
// If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context
|
||||
// "conversation_id": "65f6c28546bae1f0fbb532de",
|
||||
@ -309,7 +311,7 @@ Response data:
|
||||
```json
|
||||
{
|
||||
"id": "65f6c28546bae1f0fbb532de",
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
@ -434,7 +436,7 @@ Request data:
|
||||
```json
|
||||
{
|
||||
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
@ -461,7 +463,7 @@ Response data:
|
||||
```json
|
||||
{
|
||||
"id": "cnmuo7mcp7f9hjcmihn0",
|
||||
"model": "glm4",
|
||||
"model": "glm-4",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "glm-free-api",
|
||||
"version": "0.0.33",
|
||||
"version": "0.0.34",
|
||||
"description": "GLM Free API Server",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
|
@ -17,6 +17,8 @@ import util from "@/lib/util.ts";
|
||||
const MODEL_NAME = "glm";
|
||||
// 默认的智能体ID,GLM4
|
||||
const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796";
|
||||
// zero推理模型智能体ID
|
||||
const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
|
||||
// access_token有效期
|
||||
const ACCESS_TOKEN_EXPIRES = 3600;
|
||||
// 最大重试次数
|
||||
@ -165,13 +167,13 @@ async function removeConversation(
|
||||
*
|
||||
* @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文
|
||||
* @param refreshToken 用于刷新access_token的refresh_token
|
||||
* @param assistantId 智能体ID,默认使用GLM4原版
|
||||
* @param model 智能体ID,默认使用GLM4原版
|
||||
* @param retryCount 重试次数
|
||||
*/
|
||||
async function createCompletion(
|
||||
messages: any[],
|
||||
refreshToken: string,
|
||||
assistantId = DEFAULT_ASSISTANT_ID,
|
||||
model = MODEL_NAME,
|
||||
refConvId = "",
|
||||
retryCount = 0
|
||||
) {
|
||||
@ -189,6 +191,13 @@ async function createCompletion(
|
||||
// 如果引用对话ID不正确则重置引用
|
||||
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
|
||||
|
||||
let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
|
||||
|
||||
if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
|
||||
assistantId = ZERO_ASSISTANT_ID;
|
||||
logger.info('使用思考模型');
|
||||
}
|
||||
|
||||
// 请求流
|
||||
const token = await acquireToken(refreshToken);
|
||||
const result = await axios.post(
|
||||
@ -200,8 +209,11 @@ async function createCompletion(
|
||||
meta_data: {
|
||||
channel: "",
|
||||
draft_id: "",
|
||||
if_plus_model: true,
|
||||
input_question_type: "xxxx",
|
||||
is_test: false,
|
||||
platform: "pc",
|
||||
quote_log_id: ""
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -231,7 +243,7 @@ async function createCompletion(
|
||||
|
||||
const streamStartTime = util.timestamp();
|
||||
// 接收流为输出文本
|
||||
const answer = await receiveStream(result.data);
|
||||
const answer = await receiveStream(model, result.data);
|
||||
logger.success(
|
||||
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
|
||||
);
|
||||
@ -251,7 +263,7 @@ async function createCompletion(
|
||||
return createCompletion(
|
||||
messages,
|
||||
refreshToken,
|
||||
assistantId,
|
||||
model,
|
||||
refConvId,
|
||||
retryCount + 1
|
||||
);
|
||||
@ -266,13 +278,13 @@ async function createCompletion(
|
||||
*
|
||||
* @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文
|
||||
* @param refreshToken 用于刷新access_token的refresh_token
|
||||
* @param assistantId 智能体ID,默认使用GLM4原版
|
||||
* @param model 智能体ID,默认使用GLM4原版
|
||||
* @param retryCount 重试次数
|
||||
*/
|
||||
async function createCompletionStream(
|
||||
messages: any[],
|
||||
refreshToken: string,
|
||||
assistantId = DEFAULT_ASSISTANT_ID,
|
||||
model = MODEL_NAME,
|
||||
refConvId = "",
|
||||
retryCount = 0
|
||||
) {
|
||||
@ -290,6 +302,13 @@ async function createCompletionStream(
|
||||
// 如果引用对话ID不正确则重置引用
|
||||
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";
|
||||
|
||||
let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;
|
||||
|
||||
if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
|
||||
assistantId = ZERO_ASSISTANT_ID;
|
||||
logger.info('使用思考模型');
|
||||
}
|
||||
|
||||
// 请求流
|
||||
const token = await acquireToken(refreshToken);
|
||||
const result = await axios.post(
|
||||
@ -301,8 +320,11 @@ async function createCompletionStream(
|
||||
meta_data: {
|
||||
channel: "",
|
||||
draft_id: "",
|
||||
if_plus_model: true,
|
||||
input_question_type: "xxxx",
|
||||
is_test: false,
|
||||
platform: "pc",
|
||||
quote_log_id: ""
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -354,7 +376,7 @@ async function createCompletionStream(
|
||||
|
||||
const streamStartTime = util.timestamp();
|
||||
// 创建转换流将消息格式转换为gpt兼容格式
|
||||
return createTransStream(result.data, (convId: string) => {
|
||||
return createTransStream(model, result.data, (convId: string) => {
|
||||
logger.success(
|
||||
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
|
||||
);
|
||||
@ -372,7 +394,7 @@ async function createCompletionStream(
|
||||
return createCompletionStream(
|
||||
messages,
|
||||
refreshToken,
|
||||
assistantId,
|
||||
model,
|
||||
refConvId,
|
||||
retryCount + 1
|
||||
);
|
||||
@ -407,8 +429,11 @@ async function generateImages(
|
||||
meta_data: {
|
||||
channel: "",
|
||||
draft_id: "",
|
||||
if_plus_model: true,
|
||||
input_question_type: "xxxx",
|
||||
is_test: false,
|
||||
platform: "pc",
|
||||
quote_log_id: ""
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) {
|
||||
/**
|
||||
* 从流接收完整的消息内容
|
||||
*
|
||||
* @param model 模型
|
||||
* @param stream 消息流
|
||||
*/
|
||||
async function receiveStream(stream: any): Promise<any> {
|
||||
async function receiveStream(model: string, stream: any): Promise<any> {
|
||||
return new Promise((resolve, reject) => {
|
||||
// 消息初始化
|
||||
const data = {
|
||||
id: "",
|
||||
model: MODEL_NAME,
|
||||
model,
|
||||
object: "chat.completion",
|
||||
choices: [
|
||||
{
|
||||
@ -923,6 +949,8 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
created: util.unixTimestamp(),
|
||||
};
|
||||
const isSilentModel = model.indexOf('silent') != -1;
|
||||
let thinkingText = "";
|
||||
let toolCall = false;
|
||||
let codeGenerating = false;
|
||||
let textChunkLength = 0;
|
||||
@ -930,6 +958,7 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
let lastExecutionOutput = "";
|
||||
let textOffset = 0;
|
||||
let refContent = "";
|
||||
logger.info(`是否静默模型: ${isSilentModel}`);
|
||||
const parser = createParser((event) => {
|
||||
try {
|
||||
if (event.type !== "event") return;
|
||||
@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
textChunkLength = 0;
|
||||
innerStr += "\n";
|
||||
}
|
||||
|
||||
if (type == "text") {
|
||||
if (toolCall) {
|
||||
innerStr += "\n";
|
||||
@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
}
|
||||
if (partStatus == "finish") textChunkLength = text.length;
|
||||
return innerStr + text;
|
||||
} else if (
|
||||
} else if (type == "text_thinking" && !isSilentModel) {
|
||||
if (toolCall) {
|
||||
innerStr += "\n";
|
||||
textOffset++;
|
||||
toolCall = false;
|
||||
}
|
||||
thinkingText = text;
|
||||
return innerStr;
|
||||
}else if (
|
||||
type == "quote_result" &&
|
||||
status == "finish" &&
|
||||
meta_data &&
|
||||
_.isArray(meta_data.metadata_list)
|
||||
_.isArray(meta_data.metadata_list) &&
|
||||
!isSilentModel
|
||||
) {
|
||||
refContent = meta_data.metadata_list.reduce((meta, v) => {
|
||||
return meta + `${v.title} - ${v.url}\n`;
|
||||
@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
);
|
||||
data.choices[0].message.content += chunk;
|
||||
} else {
|
||||
if(thinkingText)
|
||||
data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`;
|
||||
data.choices[0].message.content =
|
||||
data.choices[0].message.content.replace(
|
||||
/【\d+†(来源|源|source)】/g,
|
||||
@ -1059,18 +1100,22 @@ async function receiveStream(stream: any): Promise<any> {
|
||||
*
|
||||
* 将流格式转换为gpt兼容流格式
|
||||
*
|
||||
* @param model 模型
|
||||
* @param stream 消息流
|
||||
* @param endCallback 传输结束回调
|
||||
*/
|
||||
function createTransStream(stream: any, endCallback?: Function) {
|
||||
function createTransStream(model: string, stream: any, endCallback?: Function) {
|
||||
// 消息创建时间
|
||||
const created = util.unixTimestamp();
|
||||
// 创建转换流
|
||||
const transStream = new PassThrough();
|
||||
const isSilentModel = model.indexOf('silent') != -1;
|
||||
let content = "";
|
||||
let thinking = false;
|
||||
let toolCall = false;
|
||||
let codeGenerating = false;
|
||||
let textChunkLength = 0;
|
||||
let thinkingText = "";
|
||||
let codeTemp = "";
|
||||
let lastExecutionOutput = "";
|
||||
let textOffset = 0;
|
||||
@ -1078,7 +1123,7 @@ function createTransStream(stream: any, endCallback?: Function) {
|
||||
transStream.write(
|
||||
`data: ${JSON.stringify({
|
||||
id: "",
|
||||
model: MODEL_NAME,
|
||||
model,
|
||||
object: "chat.completion.chunk",
|
||||
choices: [
|
||||
{
|
||||
@ -1116,6 +1161,11 @@ function createTransStream(stream: any, endCallback?: Function) {
|
||||
innerStr += "\n";
|
||||
}
|
||||
if (type == "text") {
|
||||
if(thinking) {
|
||||
innerStr += "[思考结束]\n\n"
|
||||
textOffset = thinkingText.length + 8;
|
||||
thinking = false;
|
||||
}
|
||||
if (toolCall) {
|
||||
innerStr += "\n";
|
||||
textOffset++;
|
||||
@ -1123,11 +1173,26 @@ function createTransStream(stream: any, endCallback?: Function) {
|
||||
}
|
||||
if (partStatus == "finish") textChunkLength = text.length;
|
||||
return innerStr + text;
|
||||
} else if (type == "text_thinking" && !isSilentModel) {
|
||||
if(!thinking) {
|
||||
innerStr += "[思考开始]\n";
|
||||
textOffset = 7;
|
||||
thinking = true;
|
||||
}
|
||||
if (toolCall) {
|
||||
innerStr += "\n";
|
||||
textOffset++;
|
||||
toolCall = false;
|
||||
}
|
||||
if (partStatus == "finish") textChunkLength = text.length;
|
||||
thinkingText += text.substring(thinkingText.length, text.length);
|
||||
return innerStr + text;
|
||||
} else if (
|
||||
type == "quote_result" &&
|
||||
status == "finish" &&
|
||||
meta_data &&
|
||||
_.isArray(meta_data.metadata_list)
|
||||
_.isArray(meta_data.metadata_list) &&
|
||||
!isSilentModel
|
||||
) {
|
||||
const searchText =
|
||||
meta_data.metadata_list.reduce(
|
||||
|
@ -5,6 +5,9 @@ import Response from '@/lib/response/Response.ts';
|
||||
import chat from '@/api/controllers/chat.ts';
|
||||
import logger from '@/lib/logger.ts';
|
||||
|
||||
// zero推理模型智能体ID
|
||||
const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
|
||||
|
||||
export default {
|
||||
|
||||
prefix: '/v1/chat',
|
||||
@ -21,15 +24,15 @@ export default {
|
||||
// 随机挑选一个refresh_token
|
||||
const token = _.sample(tokens);
|
||||
const { model, conversation_id: convId, messages, stream } = request.body;
|
||||
const assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined
|
||||
|
||||
if (stream) {
|
||||
const stream = await chat.createCompletionStream(messages, token, assistantId, convId);
|
||||
const stream = await chat.createCompletionStream(messages, token, model, convId);
|
||||
return new Response(stream, {
|
||||
type: "text/event-stream"
|
||||
});
|
||||
}
|
||||
else
|
||||
return await chat.createCompletion(messages, token, assistantId, convId);
|
||||
return await chat.createCompletion(messages, token, model, convId);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,11 @@ export default {
|
||||
"object": "model",
|
||||
"owned_by": "glm-free-api"
|
||||
},
|
||||
{
|
||||
"id": "glm-4-plus",
|
||||
"object": "model",
|
||||
"owned_by": "glm-free-api"
|
||||
},
|
||||
{
|
||||
"id": "glm-4v",
|
||||
"object": "model",
|
||||
|
Loading…
x
Reference in New Issue
Block a user