Source code for src.models.engine

"""Data models for inference engine services."""
from __future__ import annotations

from datetime import datetime
from enum import Enum
from typing import Dict, Optional

from bson import ObjectId
from pydantic import BaseModel, Field, PositiveInt, constr, validator

from ..internal.utils import sanitize_for_url, to_camel_case
from .common import PyObjectId

# NOTE: disabled ability to set resource limits
# TODO: Improve implementation of resource limits
# class ResourceLimits(BaseModel):
#     cpu_cores: float = Field(
#         default=1, gt=0, lt=16, description="CPU cores (0.5, 1, 2, 4, 8, 16)"
#     )
#     memory_gb: int = Field(
#         default=2,
#         gt=0,
#         lt=32,
#         description="Memory in GB (1, 2, 4, 8, 16, 32)",
#     )

IMAGE_URI_REGEX = "^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$"

ContainerURI = constr(regex=IMAGE_URI_REGEX)


[docs]class ServiceBackend(str, Enum): """Enum for service backend.""" KNATIVE = "knative" # knative-serving EMISSARY = "emissary" # emissary-ingress
[docs]class K8SPhase(str, Enum): """Enum for K8S phase.""" PENDING = "Pending" RUNNING = "Running" SUCCEEDED = "Succeeded" FAILED = "Failed" UNKNOWN = "Unknown"
[docs]class InferenceServiceStatus(BaseModel): service_name: str status: K8SPhase = K8SPhase.UNKNOWN message: str = "" ready: bool = True schedulable: bool = True expected_replicas: int = Field(default=1, ge=0)
[docs] class Config: """Pydantic config to allow creation of data model from a JSON object with camelCase keys. """ allow_population_by_field_name = True alias_generator = to_camel_case
[docs]class CreateInferenceEngineService(BaseModel): """Request model for creating an inference engine service.""" model_id: str # NOTE: actually model title, will convert to model id in backend image_uri: ContainerURI # resource_limits: ResourceLimits container_port: Optional[PositiveInt] = None env: Optional[Dict[str, str]] = None # float to allow for fractional GPUs num_gpus: float = Field(default=0, ge=0, le=2)
[docs] @validator("model_id") def sanitize_model_name(cls, v: str) -> str: """Generates a URL safe model id if one is not provided. Args: v (str): The model name. Returns: str: Generated model id. """ return sanitize_for_url(v)
[docs] class Config: """Pydantic config to allow creation of data model from a JSON object with camelCase keys. """ alias_generator = to_camel_case
[docs]class InferenceEngineService(CreateInferenceEngineService): """Data model for inference engine service in database.""" id: PyObjectId = Field(default_factory=PyObjectId, alias="_id") inference_url: str owner_id: str service_name: str created: datetime last_modified: datetime host: str path: str protocol: str = Field(default="http") backend: ServiceBackend
[docs] class Config: """Pydantic config to allow creation of data model from a JSON object with camelCase keys and to convert ObjectId to str when returning JSON. """ alias_generator = to_camel_case allow_population_by_field_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str}
[docs]class UpdateInferenceEngineService(BaseModel): """Request model for updating an inference engine service.""" image_uri: ContainerURI container_port: Optional[PositiveInt] = None # resource_limits: ResourceLimits env: Optional[dict] = None num_gpus: float = Field(default=0, ge=0, le=2)
[docs] class Config: """Pydantic config to allow creation of data model from a JSON object with camelCase keys. """ alias_generator = to_camel_case arbitrary_types_allowed = True json_encoders = {ObjectId: str}