Skip to content

Commit 550a7ce

Browse files
authored
Revert "feat: Add the trainer for generative HF models" (#21)
* Revert "feat: add the 4-bit quantisation option and remove unnecessary base model copying" This reverts commit d03f487. * Revert "feat: add the option to include rewards metrics" This reverts commit d1ff2fb. * Revert "fix: use the GRPO trainer for evaluation" This reverts commit 5075fa3. * Revert "feat: add the trainer for HF LLMs" This reverts commit 994f88d. * Revert "feat: add the text embedding endpoint for LLM serving" This reverts commit d760986. * Revert "feat: add the chat template factory" This reverts commit 587e9ea. * Revert "feat: handle the deprecated default chat template" This reverts commit 6c05ee1. * Revert "feat: add metrics for usages of prompt and completion tokens" This reverts commit b338b4f. * Revert "feat: add the endpoint compatible with OpenAI client protocols" This reverts commit bcea8fe.
1 parent d03f487 commit 550a7ce

27 files changed

+112
-2813
lines changed

.github/workflows/main.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
python-version: ${{ matrix.python-version }}
2525
- name: Install dependencies
2626
run: |
27-
uv sync --group dev --group docs
27+
uv sync --group dev --group docs --group vllm
2828
- name: Check types
2929
run: |
3030
uv run mypy app

app/api/api.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os.path
55
import app.api.globals as cms_globals
66

7-
from typing import Dict, Any, Optional, Union, Type
7+
from typing import Dict, Any, Optional
88
from concurrent.futures import ThreadPoolExecutor
99
from anyio.lowlevel import RunVar
1010
from anyio import CapacityLimiter
@@ -20,7 +20,7 @@
2020
from app.api.dependencies import ModelServiceDep
2121
from app.api.utils import add_exception_handlers, add_rate_limiter, init_vllm_engine
2222
from app.config import Settings
23-
from app.domain import Tags, TagsStreamable, TagsGenerative
23+
from app.domain import Tags, TagsStreamable
2424
from app.management.tracker_client import TrackerClient
2525
from app.utils import get_settings, unpack_model_data_package, get_model_data_package_base_name
2626
from app.exception import ConfigurationException
@@ -131,11 +131,6 @@ def get_generative_server(config: Settings, msd_overwritten: Optional[ModelServi
131131
app = _load_health_check_router(app)
132132
logger.debug("Health check router loaded")
133133

134-
if config.ENABLE_TRAINING_APIS == "true":
135-
app = _load_supervised_training_router(app)
136-
logger.debug("Supervised training router loaded")
137-
app = _load_training_operations(app)
138-
139134
if config.AUTH_USER_ENABLED == "true":
140135
app = _load_auth_router(app)
141136
logger.debug("Auth router loaded")
@@ -203,18 +198,11 @@ def _get_app(
203198
streamable: bool = False,
204199
generative: bool = False,
205200
) -> FastAPI:
206-
config = get_settings()
207-
tags: Union[Type[Tags], Type[TagsStreamable], Type[TagsGenerative]]
208-
if generative:
209-
tags = TagsGenerative
210-
elif streamable:
211-
tags = TagsStreamable
212-
else:
213-
tags = Tags
214201
tags_metadata = [{ # type: ignore
215-
"name": tag.name, # type: ignore
216-
"description": tag.value # type: ignore
217-
} for tag in tags]
202+
"name": tag.name,
203+
"description": tag.value
204+
} for tag in (Tags if not streamable else TagsStreamable)]
205+
config = get_settings()
218206
app = FastAPI(
219207
title="CogStack ModelServe",
220208
summary="A model serving and governance system for CogStack NLP solutions",

0 commit comments

Comments
 (0)