大须大镜 | Mayahilwa Mayakimhemot/ toolbox/ 目度 Systemedo

【IPA生成】拉丁正字注音→IPA注音(琮舌)

tool python

将拉丁正字写法的语句,按照音拍进行切分,并替换为IPA符号的综合程序。

不再使用默认的解释器,改为在VSC中直接运行代码。

将生成的音标送至IPA Reader等网站,即可生成拟似的发音。个人推荐使用Zeina(Arabic)为音源进行拟构(倍速自调)。

【IPA生成】拉丁正字注音→IPA注音(琮舌).py

import re

## 转换工具
# 正字表和注音表

vowel_odot = ['a','i','u','e','o','ä']
consonant_odot = ['r','y','w','n','jn','m','h','f','p','b','k','g','c','x','t','d','s','z','ch','dh','sh','l','cq','dq','sq','q']

vowel_odeh = ['a','i','u','e','o','ɜ']
consonant_odeh = ['ʔ','j','w','n','ɣ','m','x','f','p','b','k','g','ts','ps','t','d','s','z','tɕ','dɕ','ɕ','ɺ','ʈʂ','ɖʂ','ʂ','ɻ']

odot = []
odeh = []
# 大循环遍历辅音
for i in range(len(consonant_odot)):
    # 小循环遍历元音
    for j in range(len(vowel_odot)):
        # 组合当前辅音和元音
        combination = f"{consonant_odot[i]}{vowel_odot[j]}"
        # 添加到结果列表中
        odot.append(combination)
print(odot)
# 大循环遍历辅音
for i in range(len(consonant_odeh)):
    # 小循环遍历元音
    for j in range(len(vowel_odeh)):
        # 组合当前辅音和元音
        combination = f"{consonant_odeh[i]}{vowel_odeh[j]}"
        # 添加到结果列表中
        odeh.append(combination)
print(odeh)
# 构建字典
consonant_mapping = dict(zip(consonant_odot,consonant_odeh))
syllable_mapping = dict(zip(odot,odeh))
mapping = {**syllable_mapping, **consonant_mapping}
print(mapping)

# 处理字符串,替换每个'-xx'块
def process_spell(text):
    k = 3
    while k > 0:
        # 找到所有'-xx'块,其中xx是连续的字母序列
        blocks = re.findall(r'-([a-zä]{%d})'%k, text)
        # 替换每个块
        for block in blocks:
            if block in mapping:
                text = text.replace('-' + block, mapping[block])
        k -= 1
    text = text.replace("'", "-")
    return text

## 切音工具
# 设定字母集
vowels = 'aiueoä'
single_letter_consonants = 'rywnmhfpbkgcxtdszlq'
multi_letter_consonants = ['jn', 'ch', 'dh', 'sh', 'cq', 'dq', 'sq']
split_letter_consonants = ['jh', 'jq']

# 处理单词
def process_word(word):
    word = word.lower()  # 将单词小写化
    result = []  # 存储处理后的结果
    i = 0  # 字母在单词内的索引
    while i < len(word):
        # python截断操作包含括号前一个索引,不含后一个
        if i + 1 < len(word) and word[i:i+2] in multi_letter_consonants:
            # 处理多字母辅音
            if i + 3 < len(word) and word[i:i+2] == word[i+2:i+4]:
                result.append('-r') # 判定重复辅音(多字母辅音在之后重复)
            else:
                result.append('-' + word[i:i+2])
            i += 2
        elif i + 1 < len(word) and word[i:i+2] in split_letter_consonants:
            # 处理易混辅音簇
            result.append('-' + word[i+1:i+2]) # 无重复辅音
            i += 2
        elif word[i] in single_letter_consonants:
            # 处理单字母辅音
            if i + 2 < len(word) and word[i] == word[i+1] and word[i+1:i+3] not in multi_letter_consonants:
                result.append('-r') # 判定重复辅音(单字母辅音在之后重复且不是多字母辅音)
            else:
                result.append('-' + word[i])
            i += 1
        elif word[i] in vowels:
            # 处理元音
            if i == 0:
                # 首字母为元音
                result.append('-r' + word[i])
                i += 1
            elif word[i-1] in vowels:
                # 前一字母为元音
                result.append('-r' + word[i])
                i += 1
            elif not word[i-1] in single_letter_consonants:
                # 前一字符非辅音
                result.append('-r' + word[i])
                i += 1
            else:
                # 前一字母为辅音
                result.append(word[i])
                i += 1
        else:
            # 处理符号和其它字符
            result.append(word[i])
            i += 1
    return ''.join(result)


## 主函数
def main():
    try:
        sentence = input("请输入你想要转换的句子:")
        if input("默认自动切句(输入非空白内容取消切句):").strip() == '':
            print("执行自动切句(包括重复辅音前者转促音)。")
            text = process_word(sentence)
        else:
            print("不执行自动切句。")
            text = sentence
        print("切音完成:")
        print(text)
        print("转换完成:")
        print(process_spell(text))
    except Exception as e:
        print(f"发生错误:{e}")
        input("按 Enter 键退出...")

if __name__ == "__main__":
    main()