From 27d5ed0661b760559bb61a8d77a37cfc3e32ad58 Mon Sep 17 00:00:00 2001
From: turning point <40035031+colstone@users.noreply.github.com>
Date: Thu, 2 May 2024 01:43:01 +0800
Subject: [PATCH 1/4] Add JSON File of Openvpi's Version

---
 tools/nsf_hifigan/config_v1_openvpi_ver.json | 61 ++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 tools/nsf_hifigan/config_v1_openvpi_ver.json

diff --git a/tools/nsf_hifigan/config_v1_openvpi_ver.json b/tools/nsf_hifigan/config_v1_openvpi_ver.json
new file mode 100644
index 00000000..6bfdb545
--- /dev/null
+++ b/tools/nsf_hifigan/config_v1_openvpi_ver.json
@@ -0,0 +1,61 @@
+{
+    "resblock": "1",
+    "learning_rate": 0.0002,
+    "adam_b1": 0.8,
+    "adam_b2": 0.99,
+    "lr_decay": 0.999,
+    "upsample_rates": [
+        8,
+        8,
+        2,
+        2,
+        2
+    ],
+    "upsample_kernel_sizes": [
+        16,
+        16,
+        4,
+        4,
+        4
+    ],
+    "upsample_initial_channel": 512,
+    "resblock_kernel_sizes": [
+        3,
+        7,
+        11
+    ],
+    "resblock_dilation_sizes": [
+        [
+            1,
+            3,
+            5
+        ],
+        [
+            1,
+            3,
+            5
+        ],
+        [
+            1,
+            3,
+            5
+        ]
+    ],
+    "discriminator_periods": [
+        3,
+        5,
+        7,
+        11,
+        17,
+        23,
+        37
+    ],
+    "segment_size": 16384,
+    "num_mels": 128,
+    "n_fft": 2048,
+    "hop_size": 512,
+    "win_size": 2048,
+    "sampling_rate": 44100,
+    "fmin": 40,
+    "fmax": 16000
+}

From e5c5844b0c57c6acc702ad3404991ed08ee19854 Mon Sep 17 00:00:00 2001
From: turning point <40035031+colstone@users.noreply.github.com>
Date: Thu, 2 May 2024 02:06:42 +0800
Subject: [PATCH 2/4] Rename config_v1_openvpi_ver.json to
 config_v1_openvpi.json

---
 .../{config_v1_openvpi_ver.json => config_v1_openvpi.json}        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tools/nsf_hifigan/{config_v1_openvpi_ver.json => config_v1_openvpi.json} (100%)

diff --git a/tools/nsf_hifigan/config_v1_openvpi_ver.json b/tools/nsf_hifigan/config_v1_openvpi.json
similarity index 100%
rename from tools/nsf_hifigan/config_v1_openvpi_ver.json
rename to tools/nsf_hifigan/config_v1_openvpi.json

From 6eb6f897b4d1afb5c14d8d6af4ab37b7194cb143 Mon Sep 17 00:00:00 2001
From: turning point <40035031+colstone@users.noreply.github.com>
Date: Sat, 29 Jun 2024 20:41:41 +0800
Subject: [PATCH 3/4] Add multichannel audio convert part in
 test_nsf_hifigan.py

---
 tests/test_nsf_hifigan.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_nsf_hifigan.py b/tests/test_nsf_hifigan.py
index f1baaa2a..43356a73 100644
--- a/tests/test_nsf_hifigan.py
+++ b/tests/test_nsf_hifigan.py
@@ -1,5 +1,6 @@
 import soundfile as sf
 import torchaudio
+import torch
 
 from fish_diffusion.modules.pitch_extractors import ParselMouthPitchExtractor
 from fish_diffusion.modules.vocoders import NsfHifiGAN
@@ -10,6 +11,10 @@
 
 audio, sr = torchaudio.load(source)
 
+#Change the multichannel audio to single channel
+if audio.shape[0] > 1:
+    audio = torch.mean(audio, dim=0, keepdim=True)
+
 mel = gan.wav2spec(audio)
 f0 = ParselMouthPitchExtractor(f0_min=40.0, f0_max=2000.0, keep_zeros=False)(
     audio, sr, pad_to=mel.shape[-1]

From 612288983d2a5a81931f80171616e5d5b8b4c624 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 29 Jun 2024 12:43:42 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_nsf_hifigan.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_nsf_hifigan.py b/tests/test_nsf_hifigan.py
index 43356a73..81df2444 100644
--- a/tests/test_nsf_hifigan.py
+++ b/tests/test_nsf_hifigan.py
@@ -1,6 +1,6 @@
 import soundfile as sf
-import torchaudio
 import torch
+import torchaudio
 
 from fish_diffusion.modules.pitch_extractors import ParselMouthPitchExtractor
 from fish_diffusion.modules.vocoders import NsfHifiGAN
@@ -11,7 +11,7 @@
 
 audio, sr = torchaudio.load(source)
 
-#Change the multichannel audio to single channel
+# Change the multichannel audio to single channel
 if audio.shape[0] > 1:
     audio = torch.mean(audio, dim=0, keepdim=True)