apecloud
diff --git a/‎aperag/llm/base.py‎
Lines changed: 3 additions & 4 deletions b/‎aperag/llm/base.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎aperag/llm/chatglm.py‎
Lines changed: 0 additions & 135 deletions b/‎aperag/llm/chatglm.py‎
Lines changed: 0 additions & 135 deletions
diff --git a/‎aperag/llm/test_agenerate_stream.py‎
Lines changed: 0 additions & 76 deletions b/‎aperag/llm/test_agenerate_stream.py‎
Lines changed: 0 additions & 76 deletions
diff --git a/‎aperag/views/main.py‎
Lines changed: 15 additions & 14 deletions b/‎aperag/views/main.py‎
Lines changed: 15 additions & 14 deletions
@@ -83,6 +83,9 @@ def match_predictor(model_name, predictor_type, kwargs):
             case "deepseek-chat" | "gpt-4-1106-preview" | "gpt-4-vision-preview" | "gpt-4" | "gpt-4-32k" | "gpt-4-0613" | "gpt-4-32k-0613":
                 from aperag.llm.openai import OpenAIPredictor
                 return OpenAIPredictor
+            case "glm-4-plus" | "glm-4-air" | "glm-4-long" | "glm-4-flashx" | "glm-4-flash":
+                from aperag.llm.openai import OpenAIPredictor
+                return OpenAIPredictor
             case "azure-openai":
                 from aperag.llm.azure import AzureOpenAIPredictor
                 return AzureOpenAIPredictor
@@ -92,10 +95,6 @@ def match_predictor(model_name, predictor_type, kwargs):
             case "ernie-bot-turbo":
                 from aperag.llm.wenxin import BaiduQianFan
                 return BaiduQianFan
-            case "chatglm-pro" | "chatglm-std" | "chatglm-lite" | "chatglm-turbo":
-                kwargs["model"] = model_name.replace("-", "_")
-                from aperag.llm.chatglm import ChatGLMPredictor
-                return ChatGLMPredictor
             case "qwen-turbo" | "qwen-plus" | "qwen-max":
                 from aperag.llm.qianwen import QianWenPredictor
                 return QianWenPredictor
 
@@ -22,7 +22,6 @@
 from base import KubeBlocksLLMPredictor
 
 from aperag.llm.baichuan import BaiChuanPredictor
-from aperag.llm.chatglm import ChatGLMPredictor
 from aperag.llm.custom import CustomLLMPredictor
 from aperag.llm.openai import OpenAIPredictor
 from aperag.llm.wenxin import BaiduQianFan
@@ -306,80 +305,5 @@ async def test_stream_behavior():
         self.assertEqual(task_order_log, ["task2 completed.", "task1 completed."])
 
 
-class TestChatGLMPredictor(unittest.IsolatedAsyncioTestCase):
-
-    async def test_stream_async_behabior(self):
-        predictor1 = ChatGLMPredictor(api_key="id.secret", endpoint="http://192.0.2.0")
-        predictor2 = ChatGLMPredictor(api_key="id.secret")
-
-        task_order_log = []
-
-        async def async_task():
-
-            try:
-                _ = [tokens async for tokens in predictor1.agenerate_stream(prompt="test")]
-            except aiohttp.ClientConnectorError:
-                pass
-            task_order_log.append("task1 completed.")  # 在尝试连接多次(超过60s)后结束，打印log信息
-
-        async def test_stream_behavior():
-            mock_response = [
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: add",
-                "data: Kubernetes的核心技术",
-                ""
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: add",
-                "data: Service的作用是防止Pod",
-                ""
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: add",
-                "data: 失联（服务发现）",
-                ""
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: add",
-                "data: 和定义Pod访问策略",
-                ""
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: add",
-                "data: （负载均衡）。",
-                ""
-                "id: fb981fde-0080-4933-b87b-4a29eaba8d17",
-                "event: finish",
-            ]
-
-            mock_responses = [
-                "Kubernetes的核心技术",
-                "Service的作用是防止Pod",
-                "失联（服务发现）",
-                "和定义Pod访问策略",
-                "（负载均衡）。"
-            ]
-
-            # 将数据转换为字节流，每一行后面都有一个换行符
-            mock_content = "\n".join(mock_response).encode("utf-8")
-
-            url = "https://open.bigmodel.cn/api/paas/v3/model-api/chatglm_lite/sse-invoke?temperature=0.95&top_p=0.7"
-            prompt = "test prompt"
-
-            with aioresponses() as mocked:
-                mocked.post(url=url, body=mock_content, status=200)
-
-                response_list = []
-                async for resp in predictor2._agenerate_stream(prompt=prompt):
-                    response_list.append(resp)
-
-                self.assertEqual(response_list, mock_responses)
-            task_order_log.append("task2 completed.")  # 任务执行结束，打印日志信息
-
-        # 使用gather同时启动两个任务
-        # 如果agenerate_stream是异步的，那么在task1多次尝试连接期间，task2就已经在执行中了
-        # 那么task2一定比task1先结束： (1和2同时开始)---2结束--------1结束
-
-        _, _ = await asyncio.gather(async_task(), test_stream_behavior())
-
-        self.assertEqual(task_order_log, ["task2 completed.", "task1 completed."])
-
-
 if __name__ == "__main__":
     unittest.main()
@@ -178,6 +178,7 @@ def list_models(request):
                 "enabled": model_server.get("enabled", "true").lower() == "true",
                 "memory": model_server.get("memory", "disabled").lower() == "enabled",
                 "free_tier": model_server.get("free_tier", False),
+                "endpoint": model_server.get("endpoint", ""),
                 "default_token": Predictor.check_default_token(model_name=model_server["name"]),
                 "prompt_template": DEFAULT_MODEL_MEMOTY_PROMPT_TEMPLATES.get(model_server["name"],
                                                                                     DEFAULT_CHINESE_PROMPT_TEMPLATE_V3),
@@ -434,7 +435,7 @@ async def update_collection(request, collection_id, collection: CollectionIn):
     bot_ids = []
     async for bot in bots:
         bot_ids.append(bot.id)
-           
+
     return success(instance.view(bot_ids=bot_ids))
 
 
@@ -467,27 +468,27 @@ async def create_questions(request, collection_id):
         return fail(HTTPStatus.NOT_FOUND, "Collection not found")
     if collection.status == CollectionStatus.QUESTION_PENDING:
         return fail(HTTPStatus.BAD_REQUEST, "Collection is generating questions")
-    
+
     collection.status = CollectionStatus.QUESTION_PENDING
     await collection.asave()
-    
+
     documents = await sync_to_async(collection.document_set.exclude)(status=DocumentStatus.DELETED)
     generate_tasks = []
     async for document in documents:
         generate_tasks.append(generate_questions.si(document.id))
     generate_group = group(*generate_tasks)
     callback_chain = chain(generate_group, update_collection_status.s(collection.id))
     callback_chain.delay()
-    
-    return success({}) 
+
+    return success({})
 
 @router.put("/collections/{collection_id}/questions")
 async def update_question(request, collection_id, question_in: QuestionIn):
     user = get_user(request)
     collection = await query_collection(user, collection_id)
     if collection is None:
         return fail(HTTPStatus.NOT_FOUND, "Collection not found")
-    
+
     # ceate question
     if not question_in.id:
         question_instance = Question(
@@ -499,13 +500,13 @@ async def update_question(request, collection_id, question_in: QuestionIn):
     else:
         question_instance = await query_question(user, question_in.id)
         if question_instance is None:
-            return fail(HTTPStatus.NOT_FOUND, "Question not found") 
-    
+            return fail(HTTPStatus.NOT_FOUND, "Question not found")
+
     question_instance.question = question_in.question
     question_instance.answer = question_in.answer if question_in.answer else ""
     question_instance.status = QuestionStatus.PENDING
     await sync_to_async(question_instance.documents.clear)()
-    
+
     if question_in.relate_documents:
         for document_id in question_in.relate_documents:
             document = await query_document(user, collection_id, document_id)
@@ -688,7 +689,7 @@ async def update_document(
     await instance.asave()
     # if user add labels for a document, we need to update index
     update_index_for_document.delay(instance.id)
-    
+
     related_questions = await sync_to_async(document.question_set.exclude)(status=QuestionStatus.DELETED)
     async for question in related_questions:
         question.status = QuestionStatus.WARNING
@@ -712,13 +713,13 @@ async def delete_document(request, collection_id, document_id):
     await document.asave()
 
     remove_index.delay(document.id)
-    
+
     related_questions = await sync_to_async(document.question_set.exclude)(status=QuestionStatus.DELETED)
     async for question in related_questions:
         question.documents.remove(document)
         question.status = QuestionStatus.WARNING
         await question.asave()
-    
+
     return success(document.view())
 
 
@@ -736,13 +737,13 @@ async def delete_documents(request, collection_id, document_ids: List[str]):
             document.gmt_deleted = timezone.now()
             await document.asave()
             remove_index.delay(document.id)
-            
+
             related_questions = await sync_to_async(document.question_set.exclude)(status=QuestionStatus.DELETED)
             async for question in related_questions:
                 question.documents.remove(document)
                 question.status = QuestionStatus.WARNING
                 await question.asave()
-                
+
             ok.append(document.id)
         except Exception as e:
             logger.exception(e)