diff --git a/databricks/sdk/mixins/open_ai_client.py b/databricks/sdk/mixins/open_ai_client.py index a86827128..87de38a1e 100644 --- a/databricks/sdk/mixins/open_ai_client.py +++ b/databricks/sdk/mixins/open_ai_client.py @@ -1,4 +1,9 @@ -from databricks.sdk.service.serving import ServingEndpointsAPI +import json as js +from typing import Dict, Optional + +from databricks.sdk.service.serving import (ExternalFunctionRequestHttpMethod, + ExternalFunctionResponse, + ServingEndpointsAPI) class ServingEndpointsExt(ServingEndpointsAPI): @@ -50,3 +55,40 @@ def get_langchain_chat_open_ai_client(self, model): openai_api_base=self._api._cfg.host + "/serving-endpoints", api_key="no-token", # Passing in a placeholder to pass validations, this will not be used http_client=self._get_authorized_http_client()) + + def http_request(self, + conn: str, + method: ExternalFunctionRequestHttpMethod, + path: str, + *, + headers: Optional[Dict[str, str]] = None, + json: Optional[Dict[str, str]] = None, + params: Optional[Dict[str, str]] = None) -> ExternalFunctionResponse: + """Make external services call using the credentials stored in UC Connection. + + **NOTE:** Experimental: This API may change or be removed in a future release without warning. + + :param conn: str + The connection name to use. This is required to identify the external connection. + :param method: :class:`ExternalFunctionRequestHttpMethod` + The HTTP method to use (e.g., 'GET', 'POST'). This is required. + :param path: str + The relative path for the API endpoint. This is required. + :param headers: Dict[str,str] (optional) + Additional headers for the request. If not provided, only auth headers from connections would be + passed. + :param json: Dict[str,str] (optional) + JSON payload for the request. + :param params: Dict[str,str] (optional) + Query parameters for the request. + + :returns: :class:`ExternalFunctionResponse` + """ + + return super.http_request(connection_name=conn, + method=method, + path=path, + headers=js.dumps(headers), + json=js.dumps(json), + params=js.dumps(params), + ) diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index 1ada305cd..6ea85afe3 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -145,11 +145,8 @@ def from_dict(cls, d: Dict[str, any]) -> AiGatewayGuardrailParameters: @dataclass class AiGatewayGuardrailPiiBehavior: - behavior: AiGatewayGuardrailPiiBehaviorBehavior - """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input - guardrail and the request contains PII, the request is not sent to the model server and 400 - status code is returned; if 'BLOCK' is set for the output guardrail and the model response - contains PII, the PII info in the response is redacted and 400 status code is returned.""" + behavior: Optional[AiGatewayGuardrailPiiBehaviorBehavior] = None + """Configuration for input guardrail filters.""" def as_dict(self) -> dict: """Serializes the AiGatewayGuardrailPiiBehavior into a dictionary suitable for use as a JSON request body.""" @@ -170,10 +167,6 @@ def from_dict(cls, d: Dict[str, any]) -> AiGatewayGuardrailPiiBehavior: class AiGatewayGuardrailPiiBehaviorBehavior(Enum): - """Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input - guardrail and the request contains PII, the request is not sent to the model server and 400 - status code is returned; if 'BLOCK' is set for the output guardrail and the model response - contains PII, the PII info in the response is redacted and 400 status code is returned.""" BLOCK = 'BLOCK' NONE = 'NONE' @@ -289,15 +282,12 @@ def from_dict(cls, d: Dict[str, any]) -> AiGatewayRateLimit: class AiGatewayRateLimitKey(Enum): - """Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' - being the default if not specified.""" ENDPOINT = 'endpoint' USER = 'user' class AiGatewayRateLimitRenewalPeriod(Enum): - """Renewal period field for a rate limit. Currently, only 'minute' is supported.""" MINUTE = 'minute' @@ -336,9 +326,9 @@ class AmazonBedrockConfig: aws_access_key_id: Optional[str] = None """The Databricks secret key reference for an AWS access key ID with permissions to interact with - Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You - must provide an API key using one of the following fields: `aws_access_key_id` or - `aws_access_key_id_plaintext`.""" + Bedrock services. If you prefer to paste your API key directly, see + `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields: + `aws_access_key_id` or `aws_access_key_id_plaintext`.""" aws_access_key_id_plaintext: Optional[str] = None """An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext @@ -396,8 +386,6 @@ def from_dict(cls, d: Dict[str, any]) -> AmazonBedrockConfig: class AmazonBedrockConfigBedrockProvider(Enum): - """The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: - Anthropic, Cohere, AI21Labs, Amazon.""" AI21LABS = 'ai21labs' AMAZON = 'amazon' @@ -487,18 +475,21 @@ def from_dict(cls, d: Dict[str, any]) -> AutoCaptureConfigInput: @dataclass class AutoCaptureConfigOutput: catalog_name: Optional[str] = None - """The name of the catalog in Unity Catalog.""" + """The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if + the inference table is already enabled.""" enabled: Optional[bool] = None """Indicates whether the inference table is enabled.""" schema_name: Optional[str] = None - """The name of the schema in Unity Catalog.""" + """The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if + the inference table is already enabled.""" state: Optional[AutoCaptureState] = None table_name_prefix: Optional[str] = None - """The prefix of the table in Unity Catalog.""" + """The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if + the inference table is already enabled.""" def as_dict(self) -> dict: """Serializes the AutoCaptureConfigOutput into a dictionary suitable for use as a JSON request body.""" @@ -663,8 +654,8 @@ class CreateServingEndpoint: """The core config of the serving endpoint.""" ai_gateway: Optional[AiGatewayConfig] = None - """The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are - supported as of now.""" + """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned + throughput endpoints are currently supported.""" rate_limits: Optional[List[RateLimit]] = None """Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI @@ -879,21 +870,22 @@ class EmbeddingsV1ResponseEmbeddingElementObject(Enum): class EndpointCoreConfigInput: auto_capture_config: Optional[AutoCaptureConfigInput] = None """Configuration for Inference Tables which automatically logs requests and responses to Unity - Catalog.""" + Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or + updating existing provisioned throughput endpoints that never have inference table configured; + in these cases please use AI Gateway to manage inference tables.""" name: Optional[str] = None """The name of the serving endpoint to update. This field is required.""" served_entities: Optional[List[ServedEntityInput]] = None - """A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served - entities.""" + """The list of served entities under the serving endpoint config.""" served_models: Optional[List[ServedModelInput]] = None - """(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A - serving endpoint can have up to 15 served models.""" + """(Deprecated, use served_entities instead) The list of served models under the serving endpoint + config.""" traffic_config: Optional[TrafficConfig] = None - """The traffic config defining how invocations to the serving endpoint should be routed.""" + """The traffic configuration associated with the serving endpoint config.""" def as_dict(self) -> dict: """Serializes the EndpointCoreConfigInput into a dictionary suitable for use as a JSON request body.""" @@ -929,7 +921,9 @@ def from_dict(cls, d: Dict[str, any]) -> EndpointCoreConfigInput: class EndpointCoreConfigOutput: auto_capture_config: Optional[AutoCaptureConfigOutput] = None """Configuration for Inference Tables which automatically logs requests and responses to Unity - Catalog.""" + Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or + updating existing provisioned throughput endpoints that never have inference table configured; + in these cases please use AI Gateway to manage inference tables.""" config_version: Optional[int] = None """The config version that the serving endpoint is currently serving.""" @@ -1008,7 +1002,9 @@ def from_dict(cls, d: Dict[str, any]) -> EndpointCoreConfigSummary: class EndpointPendingConfig: auto_capture_config: Optional[AutoCaptureConfigOutput] = None """Configuration for Inference Tables which automatically logs requests and responses to Unity - Catalog.""" + Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or + updating existing provisioned throughput endpoints that never have inference table configured; + in these cases please use AI Gateway to manage inference tables.""" config_version: Optional[int] = None """The config version that the serving endpoint is currently serving.""" @@ -1094,10 +1090,6 @@ def from_dict(cls, d: Dict[str, any]) -> EndpointState: class EndpointStateConfigUpdate(Enum): - """The state of an endpoint's config update. This informs the user if the pending_config is in - progress, if the update failed, or if there is no update in progress. Note that if the - endpoint's config_update state value is IN_PROGRESS, another update can not be made until the - update completes or fails.""" IN_PROGRESS = 'IN_PROGRESS' NOT_UPDATING = 'NOT_UPDATING' @@ -1106,9 +1098,6 @@ class EndpointStateConfigUpdate(Enum): class EndpointStateReady(Enum): - """The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is - READY if all of the served entities in its active configuration are ready. If any of the - actively served entities are in a non-ready state, the endpoint state will be NOT_READY.""" NOT_READY = 'NOT_READY' READY = 'READY' @@ -1142,6 +1131,28 @@ def from_dict(cls, d: Dict[str, any]) -> EndpointTag: return cls(key=d.get('key', None), value=d.get('value', None)) +@dataclass +class EndpointTags: + tags: Optional[List[EndpointTag]] = None + + def as_dict(self) -> dict: + """Serializes the EndpointTags into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.tags: body['tags'] = [v.as_dict() for v in self.tags] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the EndpointTags into a shallow dictionary of its immediate attributes.""" + body = {} + if self.tags: body['tags'] = self.tags + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> EndpointTags: + """Deserializes the EndpointTags from a dictionary.""" + return cls(tags=_repeated_dict(d, 'tags', EndpointTag)) + + @dataclass class ExportMetricsResponse: contents: Optional[BinaryIO] = None @@ -1164,12 +1175,105 @@ def from_dict(cls, d: Dict[str, any]) -> ExportMetricsResponse: return cls(contents=d.get('contents', None)) +@dataclass +class ExternalFunctionRequest: + """Simple Proto message for testing""" + + connection_name: str + """The connection name to use. This is required to identify the external connection.""" + + method: ExternalFunctionRequestHttpMethod + """The HTTP method to use (e.g., 'GET', 'POST').""" + + path: str + """The relative path for the API endpoint. This is required.""" + + headers: Optional[str] = None + """Additional headers for the request. If not provided, only auth headers from connections would be + passed.""" + + json: Optional[str] = None + """The JSON payload to send in the request body.""" + + params: Optional[str] = None + """Query parameters for the request.""" + + def as_dict(self) -> dict: + """Serializes the ExternalFunctionRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.connection_name is not None: body['connection_name'] = self.connection_name + if self.headers is not None: body['headers'] = self.headers + if self.json is not None: body['json'] = self.json + if self.method is not None: body['method'] = self.method.value + if self.params is not None: body['params'] = self.params + if self.path is not None: body['path'] = self.path + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ExternalFunctionRequest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.connection_name is not None: body['connection_name'] = self.connection_name + if self.headers is not None: body['headers'] = self.headers + if self.json is not None: body['json'] = self.json + if self.method is not None: body['method'] = self.method + if self.params is not None: body['params'] = self.params + if self.path is not None: body['path'] = self.path + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionRequest: + """Deserializes the ExternalFunctionRequest from a dictionary.""" + return cls(connection_name=d.get('connection_name', None), + headers=d.get('headers', None), + json=d.get('json', None), + method=_enum(d, 'method', ExternalFunctionRequestHttpMethod), + params=d.get('params', None), + path=d.get('path', None)) + + +class ExternalFunctionRequestHttpMethod(Enum): + + DELETE = 'DELETE' + GET = 'GET' + PATCH = 'PATCH' + POST = 'POST' + PUT = 'PUT' + + +@dataclass +class ExternalFunctionResponse: + status_code: Optional[int] = None + """The HTTP status code of the response""" + + text: Optional[str] = None + """The content of the response""" + + def as_dict(self) -> dict: + """Serializes the ExternalFunctionResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.status_code is not None: body['status_code'] = self.status_code + if self.text is not None: body['text'] = self.text + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ExternalFunctionResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.status_code is not None: body['status_code'] = self.status_code + if self.text is not None: body['text'] = self.text + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> ExternalFunctionResponse: + """Deserializes the ExternalFunctionResponse from a dictionary.""" + return cls(status_code=d.get('status_code', None), text=d.get('text', None)) + + @dataclass class ExternalModel: provider: ExternalModelProvider """The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', - 'google-cloud-vertex-ai', 'openai', and 'palm'.",""" + 'google-cloud-vertex-ai', 'openai', and 'palm'.""" name: str """The name of the external model.""" @@ -1256,9 +1360,6 @@ def from_dict(cls, d: Dict[str, any]) -> ExternalModel: class ExternalModelProvider(Enum): - """The name of the provider for the external model. Currently, the supported providers are - 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', - 'google-cloud-vertex-ai', 'openai', and 'palm'.",""" AI21LABS = 'ai21labs' AMAZON_BEDROCK = 'amazon-bedrock' @@ -1307,17 +1408,16 @@ def from_dict(cls, d: Dict[str, any]) -> ExternalModelUsageElement: @dataclass class FoundationModel: + """All fields are not sensitive as they are hard-coded in the system and made available to + customers.""" + description: Optional[str] = None - """The description of the foundation model.""" display_name: Optional[str] = None - """The display name of the foundation model.""" docs: Optional[str] = None - """The URL to the documentation of the foundation model.""" name: Optional[str] = None - """The name of the foundation model.""" def as_dict(self) -> dict: """Serializes the FoundationModel into a dictionary suitable for use as a JSON request body.""" @@ -1348,23 +1448,24 @@ def from_dict(cls, d: Dict[str, any]) -> FoundationModel: @dataclass class GetOpenApiResponse: - """The response is an OpenAPI spec in JSON format that typically includes fields like openapi, - info, servers and paths, etc.""" + contents: Optional[BinaryIO] = None def as_dict(self) -> dict: """Serializes the GetOpenApiResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.contents: body['contents'] = self.contents return body def as_shallow_dict(self) -> dict: """Serializes the GetOpenApiResponse into a shallow dictionary of its immediate attributes.""" body = {} + if self.contents: body['contents'] = self.contents return body @classmethod def from_dict(cls, d: Dict[str, any]) -> GetOpenApiResponse: """Deserializes the GetOpenApiResponse from a dictionary.""" - return cls() + return cls(contents=d.get('contents', None)) @dataclass @@ -1393,13 +1494,23 @@ def from_dict(cls, d: Dict[str, any]) -> GetServingEndpointPermissionLevelsRespo @dataclass class GoogleCloudVertexAiConfig: + project_id: str + """This is the Google Cloud project id that the service account is associated with.""" + + region: str + """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more + details. Some models are only available in specific regions. + + [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations""" + private_key: Optional[str] = None """The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys]. If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` - [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys""" + [Best practices for managing service account keys]: + https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys""" private_key_plaintext: Optional[str] = None """The private key for the service account which has access to the Google Cloud Vertex AI Service @@ -1407,16 +1518,8 @@ class GoogleCloudVertexAiConfig: prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. - [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys""" - - project_id: Optional[str] = None - """This is the Google Cloud project id that the service account is associated with.""" - - region: Optional[str] = None - """This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more - details. Some models are only available in specific regions. - - [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations""" + [Best practices for managing service account keys]: + https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys""" def as_dict(self) -> dict: """Serializes the GoogleCloudVertexAiConfig into a dictionary suitable for use as a JSON request body.""" @@ -1493,6 +1596,8 @@ def from_dict(cls, d: Dict[str, any]) -> ModelDataPlaneInfo: @dataclass class OpenAiConfig: + """Configs needed to create an OpenAI model route.""" + microsoft_entra_client_id: Optional[str] = None """This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.""" @@ -1678,13 +1783,10 @@ def from_dict(cls, d: Dict[str, any]) -> PatchServingEndpointTags: @dataclass class PayloadTable: name: Optional[str] = None - """The name of the payload table.""" status: Optional[str] = None - """The status of the payload table.""" status_message: Optional[str] = None - """The status message of the payload table.""" def as_dict(self) -> dict: """Serializes the PayloadTable into a dictionary suitable for use as a JSON request body.""" @@ -1710,6 +1812,57 @@ def from_dict(cls, d: Dict[str, any]) -> PayloadTable: status_message=d.get('status_message', None)) +@dataclass +class PutAiGatewayRequest: + guardrails: Optional[AiGatewayGuardrails] = None + """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and + responses.""" + + inference_table_config: Optional[AiGatewayInferenceTableConfig] = None + """Configuration for payload logging using inference tables. Use these tables to monitor and audit + data being sent to and received from model APIs and to improve model quality.""" + + name: Optional[str] = None + """The name of the serving endpoint whose AI Gateway is being updated. This field is required.""" + + rate_limits: Optional[List[AiGatewayRateLimit]] = None + """Configuration for rate limits which can be set to limit endpoint traffic.""" + + usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None + """Configuration to enable usage tracking using system tables. These tables allow you to monitor + operational usage on endpoints and their associated costs.""" + + def as_dict(self) -> dict: + """Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.guardrails: body['guardrails'] = self.guardrails.as_dict() + if self.inference_table_config: body['inference_table_config'] = self.inference_table_config.as_dict() + if self.name is not None: body['name'] = self.name + if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits] + if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config.as_dict() + return body + + def as_shallow_dict(self) -> dict: + """Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.guardrails: body['guardrails'] = self.guardrails + if self.inference_table_config: body['inference_table_config'] = self.inference_table_config + if self.name is not None: body['name'] = self.name + if self.rate_limits: body['rate_limits'] = self.rate_limits + if self.usage_tracking_config: body['usage_tracking_config'] = self.usage_tracking_config + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayRequest: + """Deserializes the PutAiGatewayRequest from a dictionary.""" + return cls(guardrails=_from_dict(d, 'guardrails', AiGatewayGuardrails), + inference_table_config=_from_dict(d, 'inference_table_config', + AiGatewayInferenceTableConfig), + name=d.get('name', None), + rate_limits=_repeated_dict(d, 'rate_limits', AiGatewayRateLimit), + usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig)) + + @dataclass class PutAiGatewayResponse: guardrails: Optional[AiGatewayGuardrails] = None @@ -1718,7 +1871,7 @@ class PutAiGatewayResponse: inference_table_config: Optional[AiGatewayInferenceTableConfig] = None """Configuration for payload logging using inference tables. Use these tables to monitor and audit - data being sent to and received from model APIs and to improve model quality .""" + data being sent to and received from model APIs and to improve model quality.""" rate_limits: Optional[List[AiGatewayRateLimit]] = None """Configuration for rate limits which can be set to limit endpoint traffic.""" @@ -1755,6 +1908,34 @@ def from_dict(cls, d: Dict[str, any]) -> PutAiGatewayResponse: usage_tracking_config=_from_dict(d, 'usage_tracking_config', AiGatewayUsageTrackingConfig)) +@dataclass +class PutRequest: + name: Optional[str] = None + """The name of the serving endpoint whose rate limits are being updated. This field is required.""" + + rate_limits: Optional[List[RateLimit]] = None + """The list of endpoint rate limits.""" + + def as_dict(self) -> dict: + """Serializes the PutRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.name is not None: body['name'] = self.name + if self.rate_limits: body['rate_limits'] = [v.as_dict() for v in self.rate_limits] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the PutRequest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.name is not None: body['name'] = self.name + if self.rate_limits: body['rate_limits'] = self.rate_limits + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> PutRequest: + """Deserializes the PutRequest from a dictionary.""" + return cls(name=d.get('name', None), rate_limits=_repeated_dict(d, 'rate_limits', RateLimit)) + + @dataclass class PutResponse: rate_limits: Optional[List[RateLimit]] = None @@ -2020,15 +2201,12 @@ def from_dict(cls, d: Dict[str, any]) -> RateLimit: class RateLimitKey(Enum): - """Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are - supported, with 'endpoint' being the default if not specified.""" ENDPOINT = 'endpoint' USER = 'user' class RateLimitRenewalPeriod(Enum): - """Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.""" MINUTE = 'minute' @@ -2069,11 +2247,9 @@ class ServedEntityInput: """The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of - __catalog_name__.__schema_name__.__model_name__.""" + **catalog_name.schema_name.model_name**.""" entity_version: Optional[str] = None - """The version of the model in Databricks Model Registry to be served or empty if the entity is a - FEATURE_SPEC.""" environment_vars: Optional[Dict[str, str]] = None """An object containing a set of optional, user-specified environment variable key-value pairs used @@ -2102,7 +2278,7 @@ class ServedEntityInput: """The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if - not specified for other entities, it defaults to -.""" + not specified for other entities, it defaults to entity_name-entity_version.""" scale_to_zero_enabled: Optional[bool] = None """Whether the compute resources for the served entity should scale down to zero.""" @@ -2115,13 +2291,13 @@ class ServedEntityInput: scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.""" - workload_type: Optional[str] = None + workload_type: Optional[ServingModelWorkloadType] = None """The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types]. - [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" + [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" def as_dict(self) -> dict: """Serializes the ServedEntityInput into a dictionary suitable for use as a JSON request body.""" @@ -2138,7 +2314,7 @@ def as_dict(self) -> dict: if self.name is not None: body['name'] = self.name if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled if self.workload_size is not None: body['workload_size'] = self.workload_size - if self.workload_type is not None: body['workload_type'] = self.workload_type + if self.workload_type is not None: body['workload_type'] = self.workload_type.value return body def as_shallow_dict(self) -> dict: @@ -2172,26 +2348,22 @@ def from_dict(cls, d: Dict[str, any]) -> ServedEntityInput: name=d.get('name', None), scale_to_zero_enabled=d.get('scale_to_zero_enabled', None), workload_size=d.get('workload_size', None), - workload_type=d.get('workload_type', None)) + workload_type=_enum(d, 'workload_type', ServingModelWorkloadType)) @dataclass class ServedEntityOutput: creation_timestamp: Optional[int] = None - """The creation timestamp of the served entity in Unix time.""" creator: Optional[str] = None - """The email of the user who created the served entity.""" entity_name: Optional[str] = None - """The name of the entity served. The entity may be a model in the Databricks Model Registry, a - model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC - object, the full name of the object is given in the form of - __catalog_name__.__schema_name__.__model_name__.""" + """The name of the entity to be served. The entity may be a model in the Databricks Model Registry, + a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC + object, the full name of the object should be given in the form of + **catalog_name.schema_name.model_name**.""" entity_version: Optional[str] = None - """The version of the served entity in Databricks Model Registry or empty if the entity is a - FEATURE_SPEC.""" environment_vars: Optional[Dict[str, str]] = None """An object containing a set of optional, user-specified environment variable key-value pairs used @@ -2200,14 +2372,16 @@ class ServedEntityOutput: "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`""" external_model: Optional[ExternalModel] = None - """The external model that is served. NOTE: Only one of external_model, foundation_model, and - (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is - returned based on the endpoint type.""" + """The external model to be served. NOTE: Only one of external_model and (entity_name, + entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with + the latter set being used for custom model serving for a Databricks registered model. For an + existing endpoint with external_model, it cannot be updated to an endpoint without + external_model. If the endpoint is created without external_model, users cannot update it to add + external_model later. The task type of all external models within an endpoint must be the same.""" foundation_model: Optional[FoundationModel] = None - """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and - (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) is - returned based on the endpoint type.""" + """All fields are not sensitive as they are hard-coded in the system and made available to + customers.""" instance_profile_arn: Optional[str] = None """ARN of the instance profile that the served entity uses to access AWS resources.""" @@ -2219,13 +2393,15 @@ class ServedEntityOutput: """The minimum tokens per second that the endpoint can scale down to.""" name: Optional[str] = None - """The name of the served entity.""" + """The name of a served entity. It must be unique across an endpoint. A served entity name can + consist of alphanumeric characters, dashes, and underscores. If not specified for an external + model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if + not specified for other entities, it defaults to entity_name-entity_version.""" scale_to_zero_enabled: Optional[bool] = None """Whether the compute resources for the served entity should scale down to zero.""" state: Optional[ServedModelState] = None - """Information corresponding to the state of the served entity.""" workload_size: Optional[str] = None """The workload size of the served entity. The workload size corresponds to a range of provisioned @@ -2233,15 +2409,15 @@ class ServedEntityOutput: process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size - will be 0.""" + is 0.""" - workload_type: Optional[str] = None + workload_type: Optional[ServingModelWorkloadType] = None """The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types]. - [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" + [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" def as_dict(self) -> dict: """Serializes the ServedEntityOutput into a dictionary suitable for use as a JSON request body.""" @@ -2262,7 +2438,7 @@ def as_dict(self) -> dict: if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled if self.state: body['state'] = self.state.as_dict() if self.workload_size is not None: body['workload_size'] = self.workload_size - if self.workload_type is not None: body['workload_type'] = self.workload_type + if self.workload_type is not None: body['workload_type'] = self.workload_type.value return body def as_shallow_dict(self) -> dict: @@ -2304,31 +2480,22 @@ def from_dict(cls, d: Dict[str, any]) -> ServedEntityOutput: scale_to_zero_enabled=d.get('scale_to_zero_enabled', None), state=_from_dict(d, 'state', ServedModelState), workload_size=d.get('workload_size', None), - workload_type=d.get('workload_type', None)) + workload_type=_enum(d, 'workload_type', ServingModelWorkloadType)) @dataclass class ServedEntitySpec: entity_name: Optional[str] = None - """The name of the entity served. The entity may be a model in the Databricks Model Registry, a - model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC - object, the full name of the object is given in the form of - __catalog_name__.__schema_name__.__model_name__.""" entity_version: Optional[str] = None - """The version of the served entity in Databricks Model Registry or empty if the entity is a - FEATURE_SPEC.""" external_model: Optional[ExternalModel] = None - """The external model that is served. NOTE: Only one of external_model, foundation_model, and - (entity_name, entity_version) is returned based on the endpoint type.""" foundation_model: Optional[FoundationModel] = None - """The foundation model that is served. NOTE: Only one of foundation_model, external_model, and - (entity_name, entity_version) is returned based on the endpoint type.""" + """All fields are not sensitive as they are hard-coded in the system and made available to + customers.""" name: Optional[str] = None - """The name of the served entity.""" def as_dict(self) -> dict: """Serializes the ServedEntitySpec into a dictionary suitable for use as a JSON request body.""" @@ -2362,24 +2529,21 @@ def from_dict(cls, d: Dict[str, any]) -> ServedEntitySpec: @dataclass class ServedModelInput: + scale_to_zero_enabled: bool + """Whether the compute resources for the served entity should scale down to zero.""" + model_name: str - """The name of the model in Databricks Model Registry to be served or if the model resides in Unity - Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__.""" model_version: str - """The version of the model in Databricks Model Registry or Unity Catalog to be served.""" - - scale_to_zero_enabled: bool - """Whether the compute resources for the served model should scale down to zero.""" environment_vars: Optional[Dict[str, str]] = None """An object containing a set of optional, user-specified environment variable key-value pairs used - for serving this model. Note: this is an experimental feature and subject to change. Example - model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": + for serving this entity. Note: this is an experimental feature and subject to change. Example + entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`""" instance_profile_arn: Optional[str] = None - """ARN of the instance profile that the served model will use to access AWS resources.""" + """ARN of the instance profile that the served entity uses to access AWS resources.""" max_provisioned_throughput: Optional[int] = None """The maximum tokens per second that the endpoint can scale up to.""" @@ -2388,25 +2552,26 @@ class ServedModelInput: """The minimum tokens per second that the endpoint can scale down to.""" name: Optional[str] = None - """The name of a served model. It must be unique across an endpoint. If not specified, this field - will default to -. A served model name can consist of alphanumeric - characters, dashes, and underscores.""" + """The name of a served entity. It must be unique across an endpoint. A served entity name can + consist of alphanumeric characters, dashes, and underscores. If not specified for an external + model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if + not specified for other entities, it defaults to entity_name-entity_version.""" workload_size: Optional[ServedModelInputWorkloadSize] = None - """The workload size of the served model. The workload size corresponds to a range of provisioned - concurrency that the compute will autoscale between. A single unit of provisioned concurrency - can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned - concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned - concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for - each workload size will be 0.""" + """The workload size of the served entity. The workload size corresponds to a range of provisioned + concurrency that the compute autoscales between. A single unit of provisioned concurrency can + process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), + "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If + scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size + is 0.""" workload_type: Optional[ServedModelInputWorkloadType] = None - """The workload type of the served model. The workload type selects which type of compute to use in - the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU + """The workload type of the served entity. The workload type selects which type of compute to use + in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types]. - [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" + [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" def as_dict(self) -> dict: """Serializes the ServedModelInput into a dictionary suitable for use as a JSON request body.""" @@ -2458,12 +2623,6 @@ def from_dict(cls, d: Dict[str, any]) -> ServedModelInput: class ServedModelInputWorkloadSize(Enum): - """The workload size of the served model. The workload size corresponds to a range of provisioned - concurrency that the compute will autoscale between. A single unit of provisioned concurrency - can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned - concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned - concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for - each workload size will be 0.""" LARGE = 'Large' MEDIUM = 'Medium' @@ -2471,12 +2630,6 @@ class ServedModelInputWorkloadSize(Enum): class ServedModelInputWorkloadType(Enum): - """The workload type of the served model. The workload type selects which type of compute to use in - the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU - acceleration is available by selecting workload types like GPU_SMALL and others. See the - available [GPU types]. - - [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" CPU = 'CPU' GPU_LARGE = 'GPU_LARGE' @@ -2488,51 +2641,48 @@ class ServedModelInputWorkloadType(Enum): @dataclass class ServedModelOutput: creation_timestamp: Optional[int] = None - """The creation timestamp of the served model in Unix time.""" creator: Optional[str] = None - """The email of the user who created the served model.""" environment_vars: Optional[Dict[str, str]] = None """An object containing a set of optional, user-specified environment variable key-value pairs used - for serving this model. Note: this is an experimental feature and subject to change. Example - model environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": + for serving this entity. Note: this is an experimental feature and subject to change. Example + entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`""" instance_profile_arn: Optional[str] = None - """ARN of the instance profile that the served model will use to access AWS resources.""" + """ARN of the instance profile that the served entity uses to access AWS resources.""" model_name: Optional[str] = None - """The name of the model in Databricks Model Registry or the full name of the model in Unity - Catalog.""" model_version: Optional[str] = None - """The version of the model in Databricks Model Registry or Unity Catalog to be served.""" name: Optional[str] = None - """The name of the served model.""" + """The name of a served entity. It must be unique across an endpoint. A served entity name can + consist of alphanumeric characters, dashes, and underscores. If not specified for an external + model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if + not specified for other entities, it defaults to entity_name-entity_version.""" scale_to_zero_enabled: Optional[bool] = None - """Whether the compute resources for the Served Model should scale down to zero.""" + """Whether the compute resources for the served entity should scale down to zero.""" state: Optional[ServedModelState] = None - """Information corresponding to the state of the Served Model.""" workload_size: Optional[str] = None - """The workload size of the served model. The workload size corresponds to a range of provisioned - concurrency that the compute will autoscale between. A single unit of provisioned concurrency - can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned - concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned - concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for - each workload size will be 0.""" - - workload_type: Optional[str] = None - """The workload type of the served model. The workload type selects which type of compute to use in - the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU + """The workload size of the served entity. The workload size corresponds to a range of provisioned + concurrency that the compute autoscales between. A single unit of provisioned concurrency can + process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), + "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If + scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size + is 0.""" + + workload_type: Optional[ServingModelWorkloadType] = None + """The workload type of the served entity. The workload type selects which type of compute to use + in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types]. - [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" + [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types""" def as_dict(self) -> dict: """Serializes the ServedModelOutput into a dictionary suitable for use as a JSON request body.""" @@ -2547,7 +2697,7 @@ def as_dict(self) -> dict: if self.scale_to_zero_enabled is not None: body['scale_to_zero_enabled'] = self.scale_to_zero_enabled if self.state: body['state'] = self.state.as_dict() if self.workload_size is not None: body['workload_size'] = self.workload_size - if self.workload_type is not None: body['workload_type'] = self.workload_type + if self.workload_type is not None: body['workload_type'] = self.workload_type.value return body def as_shallow_dict(self) -> dict: @@ -2579,20 +2729,18 @@ def from_dict(cls, d: Dict[str, any]) -> ServedModelOutput: scale_to_zero_enabled=d.get('scale_to_zero_enabled', None), state=_from_dict(d, 'state', ServedModelState), workload_size=d.get('workload_size', None), - workload_type=d.get('workload_type', None)) + workload_type=_enum(d, 'workload_type', ServingModelWorkloadType)) @dataclass class ServedModelSpec: model_name: Optional[str] = None - """The name of the model in Databricks Model Registry or the full name of the model in Unity - Catalog.""" + """Only one of model_name and entity_name should be populated""" model_version: Optional[str] = None - """The version of the model in Databricks Model Registry or Unity Catalog to be served.""" + """Only one of model_version and entity_version should be populated""" name: Optional[str] = None - """The name of the served model.""" def as_dict(self) -> dict: """Serializes the ServedModelSpec into a dictionary suitable for use as a JSON request body.""" @@ -2621,18 +2769,8 @@ def from_dict(cls, d: Dict[str, any]) -> ServedModelSpec: @dataclass class ServedModelState: deployment: Optional[ServedModelStateDeployment] = None - """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity - is not ready yet because the deployment is still being created (i.e container image is building, - model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the - served entity was previously in a ready state but no longer is and is attempting to recover. - DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED - indicates that there was an error trying to bring up the served entity (e.g container image - build failed, the model server failed to start due to a model loading error, etc.) - DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in - bringing up another served entity under the same endpoint and config version.""" deployment_state_message: Optional[str] = None - """More information about the state of the served entity, if available.""" def as_dict(self) -> dict: """Serializes the ServedModelState into a dictionary suitable for use as a JSON request body.""" @@ -2658,15 +2796,6 @@ def from_dict(cls, d: Dict[str, any]) -> ServedModelState: class ServedModelStateDeployment(Enum): - """The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity - is not ready yet because the deployment is still being created (i.e container image is building, - model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the - served entity was previously in a ready state but no longer is and is attempting to recover. - DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED - indicates that there was an error trying to bring up the served entity (e.g container image - build failed, the model server failed to start due to a model loading error, etc.) - DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in - bringing up another served entity under the same endpoint and config version.""" ABORTED = 'DEPLOYMENT_ABORTED' CREATING = 'DEPLOYMENT_CREATING' @@ -2701,8 +2830,8 @@ def from_dict(cls, d: Dict[str, any]) -> ServerLogsResponse: @dataclass class ServingEndpoint: ai_gateway: Optional[AiGatewayConfig] = None - """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are - currently supported.""" + """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned + throughput endpoints are currently supported.""" config: Optional[EndpointCoreConfigSummary] = None """The config that is currently being served by the endpoint.""" @@ -2714,8 +2843,7 @@ class ServingEndpoint: """The email of the user who created the serving endpoint.""" id: Optional[str] = None - """System-generated ID of the endpoint. This is used to refer to the endpoint in the Permissions - API""" + """System-generated ID of the endpoint, included to be used by the Permissions API.""" last_updated_timestamp: Optional[int] = None """The timestamp when the endpoint was last updated by a user in Unix time.""" @@ -2874,8 +3002,8 @@ def from_dict(cls, d: Dict[str, any]) -> ServingEndpointAccessControlResponse: @dataclass class ServingEndpointDetailed: ai_gateway: Optional[AiGatewayConfig] = None - """The AI Gateway configuration for the serving endpoint. NOTE: Only external model endpoints are - currently supported.""" + """The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned + throughput endpoints are currently supported.""" config: Optional[EndpointCoreConfigOutput] = None """The config that is currently being served by the endpoint.""" @@ -2983,7 +3111,6 @@ def from_dict(cls, d: Dict[str, any]) -> ServingEndpointDetailed: class ServingEndpointDetailedPermissionLevel(Enum): - """The permission level of the principal making the request.""" CAN_MANAGE = 'CAN_MANAGE' CAN_QUERY = 'CAN_QUERY' @@ -3123,6 +3250,15 @@ def from_dict(cls, d: Dict[str, any]) -> ServingEndpointPermissionsRequest: serving_endpoint_id=d.get('serving_endpoint_id', None)) +class ServingModelWorkloadType(Enum): + + CPU = 'CPU' + GPU_LARGE = 'GPU_LARGE' + GPU_MEDIUM = 'GPU_MEDIUM' + GPU_SMALL = 'GPU_SMALL' + MULTIGPU_MEDIUM = 'MULTIGPU_MEDIUM' + + @dataclass class TrafficConfig: routes: Optional[List[Route]] = None @@ -3276,8 +3412,8 @@ def create(self, :param config: :class:`EndpointCoreConfigInput` The core config of the serving endpoint. :param ai_gateway: :class:`AiGatewayConfig` (optional) - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are - supported as of now. + The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned + throughput endpoints are currently supported. :param rate_limits: List[:class:`RateLimit`] (optional) Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. @@ -3325,7 +3461,6 @@ def delete(self, name: str): """Delete a serving endpoint. :param name: str - The name of the serving endpoint. This field is required. """ @@ -3367,7 +3502,7 @@ def get(self, name: str) -> ServingEndpointDetailed: res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}', headers=headers) return ServingEndpointDetailed.from_dict(res) - def get_open_api(self, name: str): + def get_open_api(self, name: str) -> GetOpenApiResponse: """Get the schema for a serving endpoint. Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for @@ -3376,12 +3511,13 @@ def get_open_api(self, name: str): :param name: str The name of the serving endpoint that the served model belongs to. This field is required. - + :returns: :class:`GetOpenApiResponse` """ - headers = {'Accept': 'application/json', } + headers = {'Accept': 'text/plain', } - self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers) + res = self._api.do('GET', f'/api/2.0/serving-endpoints/{name}/openapi', headers=headers, raw=True) + return GetOpenApiResponse.from_dict(res) def get_permission_levels(self, serving_endpoint_id: str) -> GetServingEndpointPermissionLevelsResponse: """Get serving endpoint permission levels. @@ -3420,6 +3556,44 @@ def get_permissions(self, serving_endpoint_id: str) -> ServingEndpointPermission headers=headers) return ServingEndpointPermissions.from_dict(res) + def http_request(self, + connection_name: str, + method: ExternalFunctionRequestHttpMethod, + path: str, + *, + headers: Optional[str] = None, + json: Optional[str] = None, + params: Optional[str] = None) -> ExternalFunctionResponse: + """Make external services call using the credentials stored in UC Connection. + + :param connection_name: str + The connection name to use. This is required to identify the external connection. + :param method: :class:`ExternalFunctionRequestHttpMethod` + The HTTP method to use (e.g., 'GET', 'POST'). + :param path: str + The relative path for the API endpoint. This is required. + :param headers: str (optional) + Additional headers for the request. If not provided, only auth headers from connections would be + passed. + :param json: str (optional) + The JSON payload to send in the request body. + :param params: str (optional) + Query parameters for the request. + + :returns: :class:`ExternalFunctionResponse` + """ + body = {} + if connection_name is not None: body['connection_name'] = connection_name + if headers is not None: body['headers'] = headers + if json is not None: body['json'] = json + if method is not None: body['method'] = method.value + if params is not None: body['params'] = params + if path is not None: body['path'] = path + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + + res = self._api.do('POST', '/api/2.0/external-function', body=body, headers=headers) + return ExternalFunctionResponse.from_dict(res) + def list(self) -> Iterator[ServingEndpoint]: """Get all serving endpoints. @@ -3456,7 +3630,7 @@ def patch(self, name: str, *, add_tags: Optional[List[EndpointTag]] = None, - delete_tags: Optional[List[str]] = None) -> Iterator[EndpointTag]: + delete_tags: Optional[List[str]] = None) -> EndpointTags: """Update tags of a serving endpoint. Used to batch add and delete tags from a serving endpoint with a single API call. @@ -3468,7 +3642,7 @@ def patch(self, :param delete_tags: List[str] (optional) List of tag keys to delete - :returns: Iterator over :class:`EndpointTag` + :returns: :class:`EndpointTags` """ body = {} if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags] @@ -3476,7 +3650,7 @@ def patch(self, headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers) - return [EndpointTag.from_dict(v) for v in res] + return EndpointTags.from_dict(res) def put(self, name: str, *, rate_limits: Optional[List[RateLimit]] = None) -> PutResponse: """Update rate limits of a serving endpoint. @@ -3511,8 +3685,8 @@ def put_ai_gateway( usage_tracking_config: Optional[AiGatewayUsageTrackingConfig] = None) -> PutAiGatewayResponse: """Update AI Gateway of a serving endpoint. - Used to update the AI Gateway of a serving endpoint. NOTE: Only external model endpoints are currently - supported. + Used to update the AI Gateway of a serving endpoint. NOTE: Only external model and provisioned + throughput endpoints are currently supported. :param name: str The name of the serving endpoint whose AI Gateway is being updated. This field is required. @@ -3672,14 +3846,16 @@ def update_config(self, The name of the serving endpoint to update. This field is required. :param auto_capture_config: :class:`AutoCaptureConfigInput` (optional) Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + Note: this field is deprecated for creating new provisioned throughput endpoints, or updating + existing provisioned throughput endpoints that never have inference table configured; in these cases + please use AI Gateway to manage inference tables. :param served_entities: List[:class:`ServedEntityInput`] (optional) - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served - entities. + The list of served entities under the serving endpoint config. :param served_models: List[:class:`ServedModelInput`] (optional) - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A - serving endpoint can have up to 15 served models. + (Deprecated, use served_entities instead) The list of served models under the serving endpoint + config. :param traffic_config: :class:`TrafficConfig` (optional) - The traffic config defining how invocations to the serving endpoint should be routed. + The traffic configuration associated with the serving endpoint config. :returns: Long-running operation waiter for :class:`ServingEndpointDetailed`.