Ming-Lite-Omni / sentence_manager /default_config.yaml
LandyGuo
update 20250516 version
81a8221
text_norm:
postprocess:
- # EN2CN
"…" : "。"
"!" : "!"
"\\?" : "?"
";" : ";"
":" : ":"
"," : ","
"\\(" : "("
"\\)" : ")"
- # EN2CN
"……": "。"
- # OTHER2CN
"﹐" : ","
"﹔" : ";"
"。" : "。"
# CN2CN
";" : "。"
":" : ","
"、" : ","
- # 处理连续句号"。"
"。+": "。"
- # 正则后的 "/"
"/": "每"
- # 处理_
"_": " "
- # 处理正则后的[~~]+,根据是否在句尾替换为“。”或“至”
"~+": "~"
"~+": "~"
"[~~]": "。"
- # 删除除英文内的“-”, "'"
"(?<=[^a-zA-Z])[-']+": ","
"[-']+(?=[^a-zA-Z])": ","
- # 删除除了标准中文标点、英文、-、’、空格、数字、中文外的其他符号
"[^。!?,\u4e00-\u4E27\u4E29-\u4E3E\u4E42-\u9fa4a-zA-Z ]": ""
- # 处理连续逗号"。"
",+": ","
- # 处理连续空格"。"
" +": " "
split_token: ["。", ","]
split_cn_length: null