""" Pydantic models for request/response validation """ from typing import Optional, Dict from pydantic import BaseModel, Field, validator class TranslationRequest(BaseModel): """ Translation request model Validates input for the translation endpoint with proper FLORES-200 language codes. """ text: str = Field( ..., example="Habari ya asubuhi", description="Text to translate (1-5000 characters)", min_length=1, max_length=5000, title="Input Text" ) target_language: str = Field( ..., example="eng_Latn", description="Target language in FLORES-200 format (e.g., eng_Latn for English)", pattern=r"^[a-z]{3}_[A-Z][a-z]{3}$", title="Target Language Code" ) source_language: Optional[str] = Field( None, example="swh_Latn", description="Source language in FLORES-200 format. If not provided, language will be auto-detected", pattern=r"^[a-z]{3}_[A-Z][a-z]{3}$", title="Source Language Code (Optional)" ) class Config: json_schema_extra = { "examples": [ { "summary": "Auto-detect source language", "description": "Translate Swahili to English with automatic language detection", "value": { "text": "Habari ya asubuhi", "target_language": "eng_Latn" } }, { "summary": "Specify source language", "description": "Translate English to Swahili with specified source language", "value": { "text": "Good morning", "source_language": "eng_Latn", "target_language": "swh_Latn" } }, { "summary": "African language translation", "description": "Translate Kikuyu to English", "value": { "text": "Wĩ mwega?", "source_language": "kik_Latn", "target_language": "eng_Latn" } } ] } @validator('text') def validate_text(cls, v): if not v.strip(): raise ValueError('Text cannot be empty or only whitespace') return v.strip() class TranslationResponse(BaseModel): """ Translation response model Contains the translated text and metadata about the translation process. """ translated_text: str = Field( ..., description="The translated text result", example="Good morning", title="Translated Text" ) source_language: str = Field( ..., description="Detected or provided source language code", example="swh_Latn", title="Source Language" ) target_language: str = Field( ..., description="Target language code as requested", example="eng_Latn", title="Target Language" ) inference_time: float = Field( ..., description="Time taken for translation in seconds", example=0.234, ge=0, title="Inference Time (seconds)" ) character_count: int = Field( ..., description="Number of characters in the input text", example=17, ge=1, title="Character Count" ) timestamp: str = Field( ..., description="Timestamp of the translation in Nairobi timezone", example="Monday | 2024-06-21 | 14:30:25", title="Timestamp" ) request_id: str = Field( ..., description="Unique request identifier for debugging and tracking", example="550e8400-e29b-41d4-a716-446655440000", title="Request ID" ) total_time: float = Field( ..., description="Total request processing time in seconds", example=1.234, ge=0, title="Total Processing Time (seconds)" ) class Config: json_schema_extra = { "example": { "translated_text": "Good morning", "source_language": "swh_Latn", "target_language": "eng_Latn", "inference_time": 0.234, "character_count": 17, "timestamp": "Monday | 2024-06-21 | 14:30:25", "request_id": "550e8400-e29b-41d4-a716-446655440000", "total_time": 1.234 } } class HealthResponse(BaseModel): """Response model for health check endpoints""" status: str = Field(..., description="API health status") version: str = Field(..., description="API version") models_loaded: bool = Field(..., description="Whether models are loaded") uptime: float = Field(..., description="API uptime in seconds") timestamp: str = Field(..., description="Current timestamp") class LanguageDetectionRequest(BaseModel): """ Language detection request model For detecting the language of input text. """ text: str = Field( ..., example="Habari ya asubuhi", description="Text to detect language for (1-1000 characters)", min_length=1, max_length=1000, title="Input Text" ) class Config: json_schema_extra = { "examples": [ { "summary": "Swahili text detection", "description": "Detect language for Swahili greeting", "value": { "text": "Habari ya asubuhi" } }, { "summary": "English text detection", "description": "Detect language for English text", "value": { "text": "Good morning, how are you?" } }, { "summary": "French text detection", "description": "Detect language for French text", "value": { "text": "Bonjour, comment allez-vous?" } } ] } class LanguageDetectionResponse(BaseModel): """ Language detection response model Contains detected language information and confidence. """ detected_language: str = Field( ..., description="Detected language code in FLORES-200 format", example="swh_Latn", title="Detected Language Code" ) language_name: str = Field( ..., description="Human-readable name of detected language", example="Swahili", title="Language Name" ) native_name: str = Field( ..., description="Native name of detected language", example="Kiswahili", title="Native Language Name" ) confidence: float = Field( ..., description="Detection confidence score (0.0 to 1.0)", example=0.9876, ge=0.0, le=1.1, # Allow slightly above 1.0 for FastText edge cases title="Confidence Score" ) is_english: bool = Field( ..., description="Whether the detected language is English", example=False, title="Is English" ) character_count: int = Field( ..., description="Number of characters in input text", example=17, ge=1, title="Character Count" ) timestamp: str = Field( ..., description="Detection timestamp in Nairobi timezone", example="Monday | 2024-06-21 | 14:30:25", title="Timestamp" ) request_id: str = Field( ..., description="Unique request identifier for debugging", example="550e8400-e29b-41d4-a716-446655440000", title="Request ID" ) total_time: float = Field( ..., description="Total request processing time in seconds", example=0.045, ge=0, title="Total Processing Time (seconds)" ) class Config: json_schema_extra = { "example": { "detected_language": "swh_Latn", "language_name": "Swahili", "native_name": "Kiswahili", "confidence": 0.9876, "is_english": False, "character_count": 17, "timestamp": "Monday | 2024-06-21 | 14:30:25", "request_id": "550e8400-e29b-41d4-a716-446655440000", "total_time": 0.045 } } class ErrorResponse(BaseModel): """Response model for error responses""" error: str = Field(..., description="Error type") message: str = Field(..., description="Error message") request_id: str = Field(..., description="Request identifier") timestamp: str = Field(..., description="Error timestamp") class LanguageInfo(BaseModel): """ Language information model Contains metadata about a supported language. """ name: str = Field(..., description="English name of the language", example="Swahili") native_name: str = Field(..., description="Native name of the language", example="Kiswahili") region: str = Field(..., description="Geographic region", example="Africa") script: str = Field(..., description="Writing script", example="Latin") class LanguagesResponse(BaseModel): """ Languages list response model Contains a dictionary of supported languages with their metadata. """ languages: Dict[str, LanguageInfo] = Field(..., description="Dictionary of language codes to language info") total_count: int = Field(..., description="Total number of languages") class Config: json_schema_extra = { "example": { "languages": { "swh_Latn": { "name": "Swahili", "native_name": "Kiswahili", "region": "Africa", "script": "Latin" }, "eng_Latn": { "name": "English", "native_name": "English", "region": "Europe", "script": "Latin" } }, "total_count": 2 } } class LanguageStatsResponse(BaseModel): """ Language statistics response model Contains statistics about supported languages. """ total_languages: int = Field(..., description="Total number of supported languages") regions: int = Field(..., description="Number of geographic regions covered") scripts: int = Field(..., description="Number of writing scripts supported") by_region: Dict[str, int] = Field(..., description="Language count by region") class Config: json_schema_extra = { "example": { "total_languages": 200, "regions": 6, "scripts": 15, "by_region": { "Africa": 25, "Europe": 40, "Asia": 80, "Middle East": 15, "Americas": 30, "Oceania": 10 } } }