|
""" |
|
Language support service - provides information about supported languages |
|
""" |
|
|
|
from typing import Dict, List, Optional |
|
from ..core.logging import get_logger |
|
|
|
logger = get_logger() |
|
|
|
|
|
SUPPORTED_LANGUAGES = { |
|
|
|
"afr_Latn": {"name": "Afrikaans", "native_name": "Afrikaans", "region": "Africa", "script": "Latin"}, |
|
"aka_Latn": {"name": "Akan", "native_name": "Akan", "region": "Africa", "script": "Latin"}, |
|
"amh_Ethi": {"name": "Amharic", "native_name": "አማርኛ", "region": "Africa", "script": "Ethiopic"}, |
|
"bam_Latn": {"name": "Bambara", "native_name": "Bamanankan", "region": "Africa", "script": "Latin"}, |
|
"bem_Latn": {"name": "Bemba", "native_name": "Ichibemba", "region": "Africa", "script": "Latin"}, |
|
"dik_Latn": {"name": "Dinka", "native_name": "Thuɔŋjäŋ", "region": "Africa", "script": "Latin"}, |
|
"dyu_Latn": {"name": "Dyula", "native_name": "Jula", "region": "Africa", "script": "Latin"}, |
|
"ewe_Latn": {"name": "Ewe", "native_name": "Eʋegbe", "region": "Africa", "script": "Latin"}, |
|
"fon_Latn": {"name": "Fon", "native_name": "Fɔngbe", "region": "Africa", "script": "Latin"}, |
|
"fuv_Latn": {"name": "Nigerian Fulfulde", "native_name": "Fulfulde", "region": "Africa", "script": "Latin"}, |
|
"gaz_Latn": {"name": "West Central Oromo", "native_name": "Oromoo", "region": "Africa", "script": "Latin"}, |
|
"hau_Latn": {"name": "Hausa", "native_name": "Harshen Hausa", "region": "Africa", "script": "Latin"}, |
|
"ibo_Latn": {"name": "Igbo", "native_name": "Asụsụ Igbo", "region": "Africa", "script": "Latin"}, |
|
"kab_Latn": {"name": "Kabyle", "native_name": "Taqbaylit", "region": "Africa", "script": "Latin"}, |
|
"kam_Latn": {"name": "Kamba", "native_name": "Kikamba", "region": "Africa", "script": "Latin"}, |
|
"kbp_Latn": {"name": "Kabiyè", "native_name": "Kabɩyɛ", "region": "Africa", "script": "Latin"}, |
|
"kea_Latn": {"name": "Kabuverdianu", "native_name": "Kabuverdianu", "region": "Africa", "script": "Latin"}, |
|
"kik_Latn": {"name": "Kikuyu", "native_name": "Gĩkũyũ", "region": "Africa", "script": "Latin"}, |
|
"kin_Latn": {"name": "Kinyarwanda", "native_name": "Ikinyarwanda", "region": "Africa", "script": "Latin"}, |
|
"kmb_Latn": {"name": "Kimbundu", "native_name": "Kimbundu", "region": "Africa", "script": "Latin"}, |
|
"knc_Arab": {"name": "Central Kanuri", "native_name": "Kanuri", "region": "Africa", "script": "Arabic"}, |
|
"knc_Latn": {"name": "Central Kanuri", "native_name": "Kanuri", "region": "Africa", "script": "Latin"}, |
|
"kon_Latn": {"name": "Kikongo", "native_name": "Kikongo", "region": "Africa", "script": "Latin"}, |
|
"lin_Latn": {"name": "Lingala", "native_name": "Lingála", "region": "Africa", "script": "Latin"}, |
|
"lua_Latn": {"name": "Luba-Lulua", "native_name": "Tshiluba", "region": "Africa", "script": "Latin"}, |
|
"lug_Latn": {"name": "Luganda", "native_name": "Luganda", "region": "Africa", "script": "Latin"}, |
|
"luo_Latn": {"name": "Luo", "native_name": "Dholuo", "region": "Africa", "script": "Latin"}, |
|
"lus_Latn": {"name": "Mizo", "native_name": "Mizo ṭawng", "region": "Africa", "script": "Latin"}, |
|
"mos_Latn": {"name": "Mossi", "native_name": "Mooré", "region": "Africa", "script": "Latin"}, |
|
"nso_Latn": {"name": "Northern Sotho", "native_name": "Sesotho sa Leboa", "region": "Africa", "script": "Latin"}, |
|
"nus_Latn": {"name": "Nuer", "native_name": "Thok Nath", "region": "Africa", "script": "Latin"}, |
|
"nya_Latn": {"name": "Nyanja", "native_name": "Chinyanja", "region": "Africa", "script": "Latin"}, |
|
"orm_Latn": {"name": "Oromo", "native_name": "Afaan Oromoo", "region": "Africa", "script": "Latin"}, |
|
"run_Latn": {"name": "Rundi", "native_name": "Ikirundi", "region": "Africa", "script": "Latin"}, |
|
"sag_Latn": {"name": "Sango", "native_name": "Sängö", "region": "Africa", "script": "Latin"}, |
|
"sna_Latn": {"name": "Shona", "native_name": "ChiShona", "region": "Africa", "script": "Latin"}, |
|
"som_Latn": {"name": "Somali", "native_name": "Soomaali", "region": "Africa", "script": "Latin"}, |
|
"sot_Latn": {"name": "Southern Sotho", "native_name": "Sesotho", "region": "Africa", "script": "Latin"}, |
|
"ssw_Latn": {"name": "Swati", "native_name": "SiSwati", "region": "Africa", "script": "Latin"}, |
|
"swh_Latn": {"name": "Swahili", "native_name": "Kiswahili", "region": "Africa", "script": "Latin"}, |
|
"taq_Latn": {"name": "Tamasheq", "native_name": "Tamasheq", "region": "Africa", "script": "Latin"}, |
|
"taq_Tfng": {"name": "Tamasheq", "native_name": "ⵜⴰⵎⴰⵌⴰⵖ", "region": "Africa", "script": "Tifinagh"}, |
|
"tir_Ethi": {"name": "Tigrinya", "native_name": "ትግርኛ", "region": "Africa", "script": "Ethiopic"}, |
|
"tsn_Latn": {"name": "Tswana", "native_name": "Setswana", "region": "Africa", "script": "Latin"}, |
|
"tso_Latn": {"name": "Tsonga", "native_name": "Xitsonga", "region": "Africa", "script": "Latin"}, |
|
"tum_Latn": {"name": "Tumbuka", "native_name": "Chitumbuka", "region": "Africa", "script": "Latin"}, |
|
"twi_Latn": {"name": "Twi", "native_name": "Twi", "region": "Africa", "script": "Latin"}, |
|
"tzm_Tfng": {"name": "Central Atlas Tamazight", "native_name": "ⵜⴰⵎⴰⵣⵉⵖⵜ", "region": "Africa", "script": "Tifinagh"}, |
|
"umb_Latn": {"name": "Umbundu", "native_name": "Umbundu", "region": "Africa", "script": "Latin"}, |
|
"wol_Latn": {"name": "Wolof", "native_name": "Wolof", "region": "Africa", "script": "Latin"}, |
|
"xho_Latn": {"name": "Xhosa", "native_name": "isiXhosa", "region": "Africa", "script": "Latin"}, |
|
"yor_Latn": {"name": "Yoruba", "native_name": "Yorùbá", "region": "Africa", "script": "Latin"}, |
|
"zul_Latn": {"name": "Zulu", "native_name": "isiZulu", "region": "Africa", "script": "Latin"}, |
|
|
|
|
|
"eng_Latn": {"name": "English", "native_name": "English", "region": "Europe", "script": "Latin"}, |
|
"fra_Latn": {"name": "French", "native_name": "Français", "region": "Europe", "script": "Latin"}, |
|
"deu_Latn": {"name": "German", "native_name": "Deutsch", "region": "Europe", "script": "Latin"}, |
|
"spa_Latn": {"name": "Spanish", "native_name": "Español", "region": "Europe", "script": "Latin"}, |
|
"ita_Latn": {"name": "Italian", "native_name": "Italiano", "region": "Europe", "script": "Latin"}, |
|
"por_Latn": {"name": "Portuguese", "native_name": "Português", "region": "Europe", "script": "Latin"}, |
|
"rus_Cyrl": {"name": "Russian", "native_name": "Русский", "region": "Europe", "script": "Cyrillic"}, |
|
"nld_Latn": {"name": "Dutch", "native_name": "Nederlands", "region": "Europe", "script": "Latin"}, |
|
"pol_Latn": {"name": "Polish", "native_name": "Polski", "region": "Europe", "script": "Latin"}, |
|
"ces_Latn": {"name": "Czech", "native_name": "Čeština", "region": "Europe", "script": "Latin"}, |
|
"hun_Latn": {"name": "Hungarian", "native_name": "Magyar", "region": "Europe", "script": "Latin"}, |
|
"ron_Latn": {"name": "Romanian", "native_name": "Română", "region": "Europe", "script": "Latin"}, |
|
"bul_Cyrl": {"name": "Bulgarian", "native_name": "Български", "region": "Europe", "script": "Cyrillic"}, |
|
"hrv_Latn": {"name": "Croatian", "native_name": "Hrvatski", "region": "Europe", "script": "Latin"}, |
|
"srp_Cyrl": {"name": "Serbian", "native_name": "Српски", "region": "Europe", "script": "Cyrillic"}, |
|
"slk_Latn": {"name": "Slovak", "native_name": "Slovenčina", "region": "Europe", "script": "Latin"}, |
|
"slv_Latn": {"name": "Slovenian", "native_name": "Slovenščina", "region": "Europe", "script": "Latin"}, |
|
"est_Latn": {"name": "Estonian", "native_name": "Eesti", "region": "Europe", "script": "Latin"}, |
|
"lav_Latn": {"name": "Latvian", "native_name": "Latviešu", "region": "Europe", "script": "Latin"}, |
|
"lit_Latn": {"name": "Lithuanian", "native_name": "Lietuvių", "region": "Europe", "script": "Latin"}, |
|
|
|
|
|
"cmn_Hans": {"name": "Chinese (Simplified)", "native_name": "中文 (简体)", "region": "Asia", "script": "Han"}, |
|
"cmn_Hant": {"name": "Chinese (Traditional)", "native_name": "中文 (繁體)", "region": "Asia", "script": "Han"}, |
|
"jpn_Jpan": {"name": "Japanese", "native_name": "日本語", "region": "Asia", "script": "Japanese"}, |
|
"kor_Hang": {"name": "Korean", "native_name": "한국어", "region": "Asia", "script": "Hangul"}, |
|
"hin_Deva": {"name": "Hindi", "native_name": "हिन्दी", "region": "Asia", "script": "Devanagari"}, |
|
"ben_Beng": {"name": "Bengali", "native_name": "বাংলা", "region": "Asia", "script": "Bengali"}, |
|
"urd_Arab": {"name": "Urdu", "native_name": "اردو", "region": "Asia", "script": "Arabic"}, |
|
"tam_Taml": {"name": "Tamil", "native_name": "தமிழ்", "region": "Asia", "script": "Tamil"}, |
|
"tel_Telu": {"name": "Telugu", "native_name": "తెలుగు", "region": "Asia", "script": "Telugu"}, |
|
"mar_Deva": {"name": "Marathi", "native_name": "मराठी", "region": "Asia", "script": "Devanagari"}, |
|
"guj_Gujr": {"name": "Gujarati", "native_name": "ગુજરાતી", "region": "Asia", "script": "Gujarati"}, |
|
"kan_Knda": {"name": "Kannada", "native_name": "ಕನ್ನಡ", "region": "Asia", "script": "Kannada"}, |
|
"mal_Mlym": {"name": "Malayalam", "native_name": "മലയാളം", "region": "Asia", "script": "Malayalam"}, |
|
"ori_Orya": {"name": "Odia", "native_name": "ଓଡ଼ିଆ", "region": "Asia", "script": "Odia"}, |
|
"pan_Guru": {"name": "Punjabi", "native_name": "ਪੰਜਾਬੀ", "region": "Asia", "script": "Gurmukhi"}, |
|
"tha_Thai": {"name": "Thai", "native_name": "ไทย", "region": "Asia", "script": "Thai"}, |
|
"vie_Latn": {"name": "Vietnamese", "native_name": "Tiếng Việt", "region": "Asia", "script": "Latin"}, |
|
"ind_Latn": {"name": "Indonesian", "native_name": "Bahasa Indonesia", "region": "Asia", "script": "Latin"}, |
|
"msa_Latn": {"name": "Malay", "native_name": "Bahasa Melayu", "region": "Asia", "script": "Latin"}, |
|
"tgl_Latn": {"name": "Tagalog", "native_name": "Tagalog", "region": "Asia", "script": "Latin"}, |
|
|
|
|
|
"ara_Arab": {"name": "Arabic", "native_name": "العربية", "region": "Middle East", "script": "Arabic"}, |
|
"heb_Hebr": {"name": "Hebrew", "native_name": "עברית", "region": "Middle East", "script": "Hebrew"}, |
|
"fas_Arab": {"name": "Persian", "native_name": "فارسی", "region": "Middle East", "script": "Arabic"}, |
|
"tur_Latn": {"name": "Turkish", "native_name": "Türkçe", "region": "Middle East", "script": "Latin"}, |
|
|
|
|
|
"spa_Latn": {"name": "Spanish", "native_name": "Español", "region": "Americas", "script": "Latin"}, |
|
"por_Latn": {"name": "Portuguese", "native_name": "Português", "region": "Americas", "script": "Latin"}, |
|
"eng_Latn": {"name": "English", "native_name": "English", "region": "Americas", "script": "Latin"}, |
|
"fra_Latn": {"name": "French", "native_name": "Français", "region": "Americas", "script": "Latin"}, |
|
} |
|
|
|
|
|
def get_all_languages() -> Dict[str, Dict[str, str]]: |
|
"""Get all supported languages with their metadata""" |
|
return SUPPORTED_LANGUAGES |
|
|
|
|
|
def get_languages_by_region(region: str) -> Dict[str, Dict[str, str]]: |
|
"""Get languages filtered by region""" |
|
return { |
|
code: info for code, info in SUPPORTED_LANGUAGES.items() |
|
if info["region"].lower() == region.lower() |
|
} |
|
|
|
|
|
def get_language_info(language_code: str) -> Optional[Dict[str, str]]: |
|
"""Get information about a specific language""" |
|
return SUPPORTED_LANGUAGES.get(language_code) |
|
|
|
|
|
def is_language_supported(language_code: str) -> bool: |
|
"""Check if a language code is supported""" |
|
return language_code in SUPPORTED_LANGUAGES |
|
|
|
|
|
def get_popular_languages() -> Dict[str, Dict[str, str]]: |
|
"""Get most commonly used languages""" |
|
popular_codes = [ |
|
|
|
"eng_Latn", "spa_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "por_Latn", |
|
"rus_Cyrl", "cmn_Hans", "jpn_Jpan", "kor_Hang", "ara_Arab", "hin_Deva", |
|
|
|
"swh_Latn", "hau_Latn", "yor_Latn", "amh_Ethi", "som_Latn", "kik_Latn", |
|
"afr_Latn", "ibo_Latn", "orm_Latn", "aka_Latn", "bam_Latn", "fon_Latn", |
|
"lin_Latn", "lug_Latn", "nya_Latn", "sna_Latn", "tir_Ethi", "wol_Latn", |
|
"xho_Latn", "zul_Latn", "tsn_Latn", "sot_Latn" |
|
] |
|
return {code: SUPPORTED_LANGUAGES[code] for code in popular_codes if code in SUPPORTED_LANGUAGES} |
|
|
|
|
|
def get_african_languages() -> Dict[str, Dict[str, str]]: |
|
"""Get African languages specifically""" |
|
return get_languages_by_region("Africa") |
|
|
|
|
|
def search_languages(query: str) -> Dict[str, Dict[str, str]]: |
|
"""Search languages by name or native name""" |
|
query_lower = query.lower() |
|
results = {} |
|
|
|
for code, info in SUPPORTED_LANGUAGES.items(): |
|
if (query_lower in info["name"].lower() or |
|
query_lower in info["native_name"].lower() or |
|
query_lower in code.lower()): |
|
results[code] = info |
|
|
|
return results |
|
|
|
|
|
def get_language_statistics() -> Dict[str, int]: |
|
"""Get statistics about supported languages""" |
|
stats = { |
|
"total_languages": len(SUPPORTED_LANGUAGES), |
|
"regions": len(set(info["region"] for info in SUPPORTED_LANGUAGES.values())), |
|
"scripts": len(set(info["script"] for info in SUPPORTED_LANGUAGES.values())) |
|
} |
|
|
|
|
|
region_counts = {} |
|
for info in SUPPORTED_LANGUAGES.values(): |
|
region = info["region"] |
|
region_counts[region] = region_counts.get(region, 0) + 1 |
|
|
|
stats["by_region"] = region_counts |
|
return stats |
|
|