diff --git a/.gitignore b/.gitignore index 802d916..12b32b3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,47 @@ libopenjtalk.dll *.pyc *.jtlog *.wav -venv37 \ No newline at end of file +venv37 +htsengineapi/AUTHORS +htsengineapi/COPYING +htsengineapi/ChangeLog +htsengineapi/INSTALL +htsengineapi/Makefile.am +htsengineapi/Makefile.in +htsengineapi/Makefile.mak +htsengineapi/NEWS +htsengineapi/README +htsengineapi/aclocal.m4 +htsengineapi/bin/ +htsengineapi/config/ +htsengineapi/configure +htsengineapi/configure.ac +htsengineapi/include/ +htsengineapi/lib/ +libopenjtalk/AUTHORS +libopenjtalk/COPYING +libopenjtalk/ChangeLog +libopenjtalk/INSTALL +libopenjtalk/Makefile.am +libopenjtalk/Makefile.in +libopenjtalk/Makefile.mak +libopenjtalk/NEWS +libopenjtalk/README +libopenjtalk/aclocal.m4 +libopenjtalk/bin/ +libopenjtalk/config/ +libopenjtalk/configure +libopenjtalk/configure.ac +libopenjtalk/jpcommon/ +libopenjtalk/mecab-naist-jdic/ +libopenjtalk/mecab/ +libopenjtalk/mecab2njd/ +libopenjtalk/njd/ +libopenjtalk/njd2jpcommon/ +libopenjtalk/njd_set_accent_phrase/ +libopenjtalk/njd_set_accent_type/ +libopenjtalk/njd_set_digit/ +libopenjtalk/njd_set_long_vowel/ +libopenjtalk/njd_set_pronunciation/ +libopenjtalk/njd_set_unvoiced_vowel/ +libopenjtalk/text2mecab/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 0e966f7..0000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "libopenjtalk"] - path = libopenjtalk - url = https://github.com/nishimotz/libopenjtalk.git -[submodule "htsengineapi"] - path = htsengineapi - url = https://github.com/nishimotz/htsengineapi.git diff --git a/htsengineapi b/htsengineapi deleted file mode 160000 index a040a0d..0000000 --- a/htsengineapi +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a040a0dd6bea7c91eb7906280d3a27629aebf1de diff --git a/htsengineapi/.gitkeep b/htsengineapi/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/jtalkCore.py b/jtalkCore.py index 68cb496..666a966 100644 --- a/jtalkCore.py +++ b/jtalkCore.py @@ -2,8 +2,6 @@ # -*- coding: utf-8 -*- # Copyright (C) 2013-2019 Takuya Nishimoto -from __future__ import absolute_import - try: from .mecab import * except (ImportError, ValueError): diff --git a/jtalkRunner.py b/jtalkRunner.py index 7b7c187..ef3435a 100644 --- a/jtalkRunner.py +++ b/jtalkRunner.py @@ -8,31 +8,41 @@ # requires pyaudio (PortAudio wrapper) # http://people.csail.mit.edu/hubert/pyaudio/ -from __future__ import unicode_literals, print_function import os import sys -import wave import time -import cProfile -import pstats -from jtalkCore import * -import jtalkPrepare +import wave +from os import getcwd -JT_DIR = r"..\nvdajp\source\synthDrivers\jtalk" -JT_LIB_DIR = r"." -JT_DLL = os.path.join(JT_LIB_DIR, "libopenjtalk.dll") +try: + import pyaudio +except: + pyaudio = None # type: ignore +# import cProfile +# import pstats +jtalk_dir = JT_DIR = os.path.normpath( + os.path.join(getcwd(), "..", "source", "synthDrivers", "jtalk") +) +sys.path.append(JT_DIR) +import jtalkPrepare # type: ignore +from jtalkCore import * # type: ignore + +JT_DLL = os.path.join(JT_DIR, "libopenjtalk.dll") voices = [ { "id": "V1", - "name": "m1", + "name": "m001", "lang": "ja", "samp_rate": 48000, "fperiod": 240, "lf0_base": 5.0, + "pitch_bias": 0, "speaker_attenuation": 1.0, - "htsvoice": os.path.join(JT_DIR, "m001", "m001.htsvoice"), - # "espeak_variant": "max", + "htsvoice": os.path.join(jtalk_dir, "m001", "m001.htsvoice"), + "alpha": 0.55, + "beta": 0.00, + "espeak_variant": "max", }, { "id": "V2", @@ -40,11 +50,14 @@ "lang": "ja", "samp_rate": 48000, "fperiod": 240, - "lf0_base": 5.86, - "pitch_bias": -10, - "speaker_attenuation": 0.5, - "htsvoice": os.path.join(JT_DIR, "mei", "mei_normal.htsvoice"), - # "espeak_variant": "f1", + "lf0_base": 5.9, + "pitch_bias": -25, + "inflection_bias": -10, + "speaker_attenuation": 0.8, + "htsvoice": os.path.join(jtalk_dir, "mei", "mei_happy.htsvoice"), + "alpha": 0.60, # 0.55, + "beta": 0.00, + "espeak_variant": "f1", }, { "id": "V3", @@ -55,15 +68,32 @@ "lf0_base": 5.0, "pitch_bias": 0, "speaker_attenuation": 1.0, - "htsvoice": os.path.join(JT_DIR, "lite", "voice.htsvoice"), - # "espeak_variant": "max", + "htsvoice": os.path.join(jtalk_dir, "lite", "voice.htsvoice"), + "alpha": 0.42, + "beta": 0.00, + "espeak_variant": "max", + }, + { + "id": "V4", + "name": "tohoku-f01", + "lang": "ja", + "samp_rate": 48000, + "fperiod": 240, + "lf0_base": 5.9, + "pitch_bias": 0, + "inflection_bias": 0, + "speaker_attenuation": 0.8, + "htsvoice": os.path.join(jtalk_dir, "tohokuf01", "tohoku-f01-neutral.htsvoice"), + "alpha": 0.54, + "beta": 0.00, + "espeak_variant": "f1", }, ] def pa_play(data, samp_rate=16000): - import pyaudio - + if pyaudio is None: + return p = pyaudio.PyAudio() stream = p.open( format=p.get_format_from_width(2), channels=1, rate=samp_rate, output=True @@ -80,8 +110,12 @@ def pa_play(data, samp_rate=16000): p.terminate() +do_print = False + + def __print(s): - print(s.encode("cp932", "ignore")) + if do_print: + print(s.encode("cp932", "ignore")) def print_code(msg): @@ -91,18 +125,13 @@ def print_code(msg): print(s) +count = 0 + + def do_synthesis( - msg, - voice_args, - do_play, - do_write, - do_write_jt, - do_log, - fperiod, - pitch=50, - inflection=50, - vol=50, + msg, voice_args, do_play, do_write, do_log, fperiod, pitch=50, inflection=50, vol=50 ): + global count msg = jtalkPrepare.convert(msg) s = text2mecab(msg) __print("utf-8: (%s)" % s.decode("utf-8", "ignore")) @@ -110,20 +139,19 @@ def do_synthesis( Mecab_analysis(s, mf) Mecab_print(mf, __print) Mecab_correctFeatures(mf) - ar = Mecab_splitFeatures(mf) + ar = [mf] # ar = Mecab_splitFeatures(mf) __print("array size %d" % len(ar)) max_level = int(326.67 * int(vol) + 100) # 100..32767 level = int(max_level * voice_args["speaker_attenuation"]) lf0_amp = 0.020 * inflection # 50 = original range ls = 0.015 * (pitch - 50.0 + voice_args["pitch_bias"]) # 50 = no shift lf0_offset = ls + voice_args["lf0_base"] * (1 - lf0_amp) - count = 0 for a in ar: count += 1 __print("feature size %d" % a.size) Mecab_print(a, __print) Mecab_utf8_to_cp932(a) - if do_write_jt: + if do_write: w = "_test%d.jt.wav" % count else: w = None @@ -167,24 +195,35 @@ def do_synthesis( del mf -def main( - do_play=False, do_write=True, do_write_jt=False, do_log=False, voice_id=1, s="" -): +def main(do_play=False, do_write=True, do_log=False): njd = NJD() jpcommon = JPCommon() engine = HTS_Engine() libjt_initialize(JT_DLL) - v = voices[voice_id] + v = voices[3] libjt_load(v["htsvoice"]) - Mecab_initialize(__print, JT_DIR) + libjt_set_alpha(v["alpha"]) + libjt_set_beta(v["beta"]) + print("alpha:%f beta:%f" % (libjt_get_alpha(), libjt_get_beta())) + # print('GV-weight 0-0:%f' % (libjt_get_gv_interpolation_weight(0, 0),)) + # libjt_set_beta(0.40) + # libjt_set_gv_interpolation_weight(0, 0, 2) + # libjt_set_gv_interpolation_weight(0, 1, 2) + Mecab_initialize(__print, JT_DIR, os.path.join(JT_DIR, "dic")) + + msgs = [ + "welcome to nvda", + "テンキーのinsertキーとメインのinsertキーの両方がnvdaキーとして動作します。", + ] fperiod = v["fperiod"] - do_synthesis( - s, v, do_play, do_write, do_write_jt, do_log, fperiod, pitch=50, inflection=50 - ) + for s in msgs: + do_synthesis(s, v, do_play, do_write, do_log, fperiod, pitch=50, inflection=50) + return 0 if __name__ == "__main__": - main(do_play=False, do_write=True, do_log=True, voice_id=1, s="100.25ドル") + do_print = True + main(do_play=False, do_write=True) # prof = cProfile.run("main(do_play=True)", '_cprof.prof') # p = pstats.Stats('_cprof.prof') # p.strip_dirs() diff --git a/libopenjtalk b/libopenjtalk deleted file mode 160000 index 273cb38..0000000 --- a/libopenjtalk +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 273cb381ff0841a534f775b8f503b8a320960783 diff --git a/libopenjtalk/.gitkeep b/libopenjtalk/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/mecab.py b/mecab.py index 9c02e5a..120f3ef 100644 --- a/mecab.py +++ b/mecab.py @@ -1,8 +1,6 @@ # coding: UTF-8 # mecab.py for python-jtalk -from __future__ import absolute_import - CODE = "utf-8" import os @@ -15,9 +13,9 @@ from .roma2kana import getKanaFromRoma from .text2mecab import text2mecab except (ImportError, ValueError): - from _nvdajp_spellchar import convert as convertSpellChar - from roma2kana import getKanaFromRoma - from text2mecab import text2mecab + from _nvdajp_spellchar import convert as convertSpellChar # type: ignore + from roma2kana import getKanaFromRoma # type: ignore + from text2mecab import text2mecab # type: ignore c_double_p = POINTER(c_double) c_double_p_p = POINTER(c_double_p) diff --git a/readme.txt b/readme.txt index df939a9..e787225 100644 --- a/readme.txt +++ b/readme.txt @@ -1,13 +1,13 @@ A part of NonVisual Desktop Access (NVDA) This file is covered by the GNU General Public License. See the file COPYING for more details. -Copyright (C) 2015-2016 Takuya Nishimoto +Copyright (C) 2015-2023 Takuya Nishimoto setup: -Visual Studio 2015 (Ver.14.0 for Windows Desktop) +Visual Studio 2022 -Python 2.7.11 (win32) +Python 3.11 (win32) > git clone https://github.com/nvdajp/python-jtalk > cd python-jtalk diff --git a/text2mecab.py b/text2mecab.py index 301d833..ca07e0a 100644 --- a/text2mecab.py +++ b/text2mecab.py @@ -1,10 +1,8 @@ # coding: UTF-8 # text2mecab.py for python-jtalk -from __future__ import absolute_import - -import unicodedata import re +import unicodedata CODE = "utf-8" @@ -112,6 +110,7 @@ def text2mecab_setup(): [re.compile("\\|"), "|"], [re.compile("}"), "}"], [re.compile("~"), "〜"], + [re.compile("�"), "?"], # u+fffd ] diff --git a/vcsetup.cmd b/vcsetup.cmd index cee78a1..f346ded 100644 --- a/vcsetup.cmd +++ b/vcsetup.cmd @@ -2,20 +2,29 @@ cl if "%ERRORLEVEL%" neq "9009" goto :done -if exist "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\vcvars32.bat" goto vc2015x64 -if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars32.bat" goto vc2017x64 +if exist "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars32.bat" goto vc2022x64 if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat" goto vc2019x64 -call "C:\Program Files\Microsoft Visual Studio 14.0\VC\bin\vcvars32.bat" -SET CL=/arch:IA32 /D "_USING_V110_SDK71_" +if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars32.bat" goto vc2017x64 +if exist "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\vcvars32.bat" goto vc2015x64 + +:vc2022x64 +call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars32.bat" +SET CL=/arch:IA32 goto done -:vc2015x64 -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\vcvars32.bat" -SET CL=/arch:IA32 /D "_USING_V110_SDK71_" + +:vc2019x64 +call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat" +SET CL=/arch:IA32 goto done + :vc2017x64 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars32.bat" SET CL=/arch:IA32 -:vc2019x64 -call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat" -SET CL=/arch:IA32 -:done +goto done + +:vc2015x64 +call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\vcvars32.bat" +SET CL=/arch:IA32 /D "_USING_V110_SDK71_" +goto done + +:done \ No newline at end of file