From 27d5ed0661b760559bb61a8d77a37cfc3e32ad58 Mon Sep 17 00:00:00 2001 From: turning point <40035031+colstone@users.noreply.github.com> Date: Thu, 2 May 2024 01:43:01 +0800 Subject: [PATCH 1/4] Add JSON File of Openvpi's Version --- tools/nsf_hifigan/config_v1_openvpi_ver.json | 61 ++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tools/nsf_hifigan/config_v1_openvpi_ver.json diff --git a/tools/nsf_hifigan/config_v1_openvpi_ver.json b/tools/nsf_hifigan/config_v1_openvpi_ver.json new file mode 100644 index 00000000..6bfdb545 --- /dev/null +++ b/tools/nsf_hifigan/config_v1_openvpi_ver.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "upsample_rates": [ + 8, + 8, + 2, + 2, + 2 + ], + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4, + 4 + ], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "discriminator_periods": [ + 3, + 5, + 7, + 11, + 17, + 23, + 37 + ], + "segment_size": 16384, + "num_mels": 128, + "n_fft": 2048, + "hop_size": 512, + "win_size": 2048, + "sampling_rate": 44100, + "fmin": 40, + "fmax": 16000 +} From e5c5844b0c57c6acc702ad3404991ed08ee19854 Mon Sep 17 00:00:00 2001 From: turning point <40035031+colstone@users.noreply.github.com> Date: Thu, 2 May 2024 02:06:42 +0800 Subject: [PATCH 2/4] Rename config_v1_openvpi_ver.json to config_v1_openvpi.json --- .../{config_v1_openvpi_ver.json => config_v1_openvpi.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/nsf_hifigan/{config_v1_openvpi_ver.json => config_v1_openvpi.json} (100%) diff --git a/tools/nsf_hifigan/config_v1_openvpi_ver.json b/tools/nsf_hifigan/config_v1_openvpi.json similarity index 100% rename from tools/nsf_hifigan/config_v1_openvpi_ver.json rename to tools/nsf_hifigan/config_v1_openvpi.json From 6eb6f897b4d1afb5c14d8d6af4ab37b7194cb143 Mon Sep 17 00:00:00 2001 From: turning point <40035031+colstone@users.noreply.github.com> Date: Sat, 29 Jun 2024 20:41:41 +0800 Subject: [PATCH 3/4] Add multichannel audio convert part in test_nsf_hifigan.py --- tests/test_nsf_hifigan.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_nsf_hifigan.py b/tests/test_nsf_hifigan.py index f1baaa2a..43356a73 100644 --- a/tests/test_nsf_hifigan.py +++ b/tests/test_nsf_hifigan.py @@ -1,5 +1,6 @@ import soundfile as sf import torchaudio +import torch from fish_diffusion.modules.pitch_extractors import ParselMouthPitchExtractor from fish_diffusion.modules.vocoders import NsfHifiGAN @@ -10,6 +11,10 @@ audio, sr = torchaudio.load(source) +#Change the multichannel audio to single channel +if audio.shape[0] > 1: + audio = torch.mean(audio, dim=0, keepdim=True) + mel = gan.wav2spec(audio) f0 = ParselMouthPitchExtractor(f0_min=40.0, f0_max=2000.0, keep_zeros=False)( audio, sr, pad_to=mel.shape[-1] From 612288983d2a5a81931f80171616e5d5b8b4c624 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 29 Jun 2024 12:43:42 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_nsf_hifigan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_nsf_hifigan.py b/tests/test_nsf_hifigan.py index 43356a73..81df2444 100644 --- a/tests/test_nsf_hifigan.py +++ b/tests/test_nsf_hifigan.py @@ -1,6 +1,6 @@ import soundfile as sf -import torchaudio import torch +import torchaudio from fish_diffusion.modules.pitch_extractors import ParselMouthPitchExtractor from fish_diffusion.modules.vocoders import NsfHifiGAN @@ -11,7 +11,7 @@ audio, sr = torchaudio.load(source) -#Change the multichannel audio to single channel +# Change the multichannel audio to single channel if audio.shape[0] > 1: audio = torch.mean(audio, dim=0, keepdim=True)