From 3e8cbfa80debdbec2a12a1d5fbe34519732c828a Mon Sep 17 00:00:00 2001 From: Anton Karmanov Date: Thu, 5 Feb 2026 14:34:00 +0300 Subject: [PATCH 1/2] Remove PreProcess unused imports Unused ast.Str is missing in Python 3.14 and leads to ImportError. Changes fixes importing for Python 3.14. --- aksharamukha/PreProcess.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aksharamukha/PreProcess.py b/aksharamukha/PreProcess.py index 581697b..b30f21b 100644 --- a/aksharamukha/PreProcess.py +++ b/aksharamukha/PreProcess.py @@ -1,14 +1,12 @@ # -*- coding: utf-8 -*- -from ast import Str -from asyncio import constants from . import GeneralMap as GM import re import string import unicodedata from . import PostProcess from . import ConvertFix as CF -from aksharamukha.ScriptMap.EastIndic import PhagsPa, Burmese, Khmer +from aksharamukha.ScriptMap.EastIndic import Burmese, Khmer from aksharamukha.ScriptMap.MainIndic import Tamil, Malayalam, Limbu, Chakma ### Use escape char in all functions @@ -37,6 +35,7 @@ def HindiMarathiRomanLoCFix(Strng): return Strng +# FIXME: Redefined later def RomanLoCLaUnderscoreDoubleDot(Strng): Strng = Strng.replace('ḻ', 'l̤') From 5b09a0b71761e1590b43fcf014af0131f10bfdbb Mon Sep 17 00:00:00 2001 From: Anton Karmanov Date: Thu, 5 Feb 2026 14:45:03 +0300 Subject: [PATCH 2/2] Fix SyntaxWarning on escape sequences --- aksharamukha/PreProcess.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/aksharamukha/PreProcess.py b/aksharamukha/PreProcess.py index b30f21b..d6b0d24 100644 --- a/aksharamukha/PreProcess.py +++ b/aksharamukha/PreProcess.py @@ -455,12 +455,12 @@ def BengaliSwitchYaYYa(Strng): def removeFinalSchwaArab(Strng): #print('here', Strng) diacrtics = ["\u0652", "\u064E", "\u0650", "\u064F"] - Strng = re.sub('([\u0628-\u0647])(?![\u0652\u064E\u0650\u064F\u0651\u064B\u064C\u064D\u0649])(?=(\W|$))', r'\1' + '\u0652', Strng) - Strng = re.sub('([\u0628-\u0647]\u0651)(?![\u0652\u064E\u0650\u064F\u064B\u064C\u064D\u0649])(?=(\W|$))', r'\1' + '\u0652', Strng) - Strng = re.sub('(? i^ @@ -1083,7 +1083,7 @@ def RemoveFinal(Strng, Target): ra = Khmer.ConsonantMap[26] Strng = re.sub('('+ListC+')'+'\u17CC',ra+'\u17D2'+r'\1',Strng) - Strng = re.sub(ISyl + '('+ListC +')' + '(((\u17D2)' + '('+ListC +'))*)([៍៎៏]?)(?=[\s\n])', r'\1\8\9' + vir + r'\13', Strng) # kama -> kam + Strng = re.sub(ISyl + '('+ListC +')' + '(((\u17D2)' + '('+ListC +'))*)([៍៎៏]?)(?=[\\s\n])', r'\1\8\9' + vir + r'\13', Strng) # kama -> kam Strng = re.sub(ISyl + '('+ListC +')' + '(((\u17D2)' + '('+ListC +'))*)([៍៎៏]?)$', r'\1\8\9' + vir + r'\13', Strng) # kama -> kam else: Strng = re.sub(ISyl + Cons2+"(?!" + Char + ")", r'\1\8' + vir, Strng) # kama -> kam @@ -1295,15 +1295,15 @@ def PreProcess(Strng,Source,Target,postoptions,preoptions): sOm = 'OM' tOm = 'oM' - punc = '(' + '|'.join(["\u005C"+x for x in list(string.punctuation)]+ ['\s'] - + [x.replace('.', '\.') for x in GM.CrunchSymbols(GM.Signs,Source)[1:3]]) + ')' + punc = '(' + '|'.join(["\u005C"+x for x in list(string.punctuation)]+ [r'\s'] + + [x.replace('.', r'\.') for x in GM.CrunchSymbols(GM.Signs,Source)[1:3]]) + ')' Strng = re.sub(punc + sOm + punc, r'\1' + tOm + r'\2', Strng) Strng = re.sub('^' + sOm + punc, tOm + r'\1', Strng) Strng = re.sub(punc + sOm + '$', r'\1' + tOm, Strng) Strng = re.sub('^' + sOm + '$', tOm, Strng) - punc = '(\s)' + punc = r'(\s)' Strng = re.sub(punc + sOm + punc, r'\1' + tOm + r'\2', Strng) Strng = re.sub('^' + sOm + punc, tOm + r'\1', Strng) @@ -1332,7 +1332,7 @@ def PreProcess(Strng,Source,Target,postoptions,preoptions): if Source == 'BarahaNorth' or Source == 'BarahaSouth': # alternate representations - alt_baraha = [('A', 'aa'), ('I', 'ee'), ('U', 'oo'), ('~loo', '~lU'), ('Roo', 'RU'), ('ou', 'au'), ('K', 'kh'), ('G','gh'), ('ch', 'c'), ('Ch', 'C'), ('J','jh'), ('P', 'ph'), ('B', 'bh'), ('w', 'v'), ('sh', 'S'), ('~h', '_h'), ('Y', 'yx'), ('^^', '{}'), ('^', '()'), ('tx', 'rx'), ('zh', 'Lx'), ('~e', '~a'), ('q', '\_'), ('#', "\\'"), ('$', '\\"')] + alt_baraha = [('A', 'aa'), ('I', 'ee'), ('U', 'oo'), ('~loo', '~lU'), ('Roo', 'RU'), ('ou', 'au'), ('K', 'kh'), ('G','gh'), ('ch', 'c'), ('Ch', 'C'), ('J','jh'), ('P', 'ph'), ('B', 'bh'), ('w', 'v'), ('sh', 'S'), ('~h', '_h'), ('Y', 'yx'), ('^^', '{}'), ('^', '()'), ('tx', 'rx'), ('zh', 'Lx'), ('~e', '~a'), ('q', r'\_'), ('#', "\\'"), ('$', '\\"')] for alt, norm in alt_baraha: Strng = Strng.replace(alt, norm) @@ -1682,7 +1682,7 @@ def PhagsPaArrange(Strng,Source): yrv = "|".join([GM.CrunchSymbols(GM.Consonants, Source)[i] for i in [25,26,28]]) Strng = re.sub("("+ListC+")"+"("+vir+")"+"("+yrv+")"+"("+"("+ListVS+")?"+"("+ListCS+")?"+")",r' \1\2\3\4',Strng) - Strng = re.sub("("+ListC+ListV+")"+"("+"("+ListVS+")?"+"("+ListCS+")?"+")"+"("+ListC+")"+"("+vir+")"+"(?!\s)",r"\1\2\5\6 ",Strng) + Strng = re.sub("("+ListC+ListV+")"+"("+"("+ListVS+")?"+"("+ListCS+")?"+")"+"("+ListC+")"+"("+vir+")"+r"(?!\s)",r"\1\2\5\6 ",Strng) Strng = re.sub("("+ListC+ListV+")"+"("+"("+ListVS+")?"+"("+ListCS+")?"+")"+"("+ListC+")"+'(?!'+vir+')',r"\1\2 \5",Strng) Strng = re.sub("("+ListC+ListV+")"+"("+"("+ListVS+")?"+"("+ListCS+")?"+")"+"("+ListC+")"+'(?!'+vir+')',r"\1\2 \5",Strng) @@ -1740,7 +1740,7 @@ def TamilTranscribeCommon(Strng, c = 31): import string - punct = "|".join(['\\'+x for x in list(string.punctuation.replace(".","").replace("?",""))])+"|\s" + punct = "|".join(['\\'+x for x in list(string.punctuation.replace(".","").replace("?",""))])+r"|\s" # CA + Spac | Punct + SA -> CCA Strng = re.sub('('+ListC[5]+vir+')'+'(('+punct+')+)'+'('+ListC[c]+')',r'\1\2'+ListC[5],Strng) @@ -1767,7 +1767,7 @@ def TamilTranscribeCommon(Strng, c = 31): Strng = re.sub(ListSC[2]+vir+ListSC[2],ListC[10]+vir+ListC[26],Strng) # RR | TT + /s + SA -> RR + /s + CA - Strng = re.sub("("+'['+ListC[10]+ListSC[2]+']'+vir+')'+'(\s)'+'('+ListC[c]+')',r'\1\2'+ListC[5],Strng) + Strng = re.sub("("+'['+ListC[10]+ListSC[2]+']'+vir+')'+r'(\s)'+'('+ListC[c]+')',r'\1\2'+ListC[5],Strng) ## NNN to N, RR to R