Skip to content

Commit dcaf8c9

Browse files
committedJan 25, 2024
Almost properly scale on onnx export
·
0.3.590.3.54
1 parent 66a2e24 commit dcaf8c9

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed
 

‎setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="vosk-tts",
8-
version="0.3.53",
8+
version="0.3.54",
99
author="Alpha Cephei Inc",
1010
author_email="contact@alphacephei.com",
1111
description="Offline text to speech synthesis",

‎training/stft.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ def inverse(self, magnitude, phase):
253253
stride=self.hop_length,
254254
padding=0)
255255

256+
# scale by hop ratio
257+
inverse_transform *= float(self.filter_length) / self.hop_length
258+
256259
inverse_transform = inverse_transform[:, :, int(self.filter_length/2):]
257260
inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):]
258261

‎vosk_tts/synth.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def audio_float_to_int16(self,
1515
audio: np.ndarray, max_wav_value: float = 32767.0
1616
) -> np.ndarray:
1717
"""Normalize audio and convert to int16 range"""
18-
audio_norm = audio * max_wav_value * 3.0
18+
audio_norm = audio * max_wav_value
1919
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
2020
audio_norm = audio_norm.astype("int16")
2121
return audio_norm

0 commit comments

Comments
 (0)
Please sign in to comment.