updated some files

JarodMica · JarodMica · commit 027aa020c1e0 · 2023-07-01T01:36:04.000-07:00
diff --git a/README.md b/README.md
@@ -1,6 +1,51 @@
 # Audio Splitter using Whisperx
+Created with the purpose for curating datasets for the sake of training AI models.  This is created with RVC (Retrieval-based Voice Conversion) in mind but generally works for any other AI voice model that needs short clips less than 10s.
 
+## Youtube Video Tutorial
+<insert tutorial later>
 
-## Pytorch
-Even if you don't have a GPU, it should still work.
+## Prerequisites
+- Python 3.10 installation
+- git installation
+- vscode installation (highly recommended)
 
+## Installation and basic usage
+1. Clone the repository (repo)
+```
+git clone https://github.com/JarodMica/audiosplitter_whisper.git
+```
+
+2. Navigate into the repo with:
+```
+cd audiosplitter_whisper
+``` 
+
+4. Run setup-cuda.py if you have a compatible Nvidia graphics card or run setup-cpu.py if you do not. **NOTE:** This splitter will work on a CPU, albeit, very slowly.  The reason I keep this option is for people who may want to curate a dataset locally, but train on colab. (AMD not compatible, Mac is not coded for (should be able to use MPS though).  Both can use CPU option)
+
+```
+python setup-cuda.py
+```
+
+5. Activate the virtual envionrment (venv).
+```
+venv\Scripts\activate
+```
+
+6. If you ran into any permission issues, you'll need to change your windows Execution Policy to Remote Signed.  This does lower security on your system a small bit as it allows for scripts to be ran on your computer, however, only those signed by a Trusted Publisher or verified by you can be run (to my knowledge).  Do at your own risk.
+    - Open a powershell window as admin.  Then, run the following command:
+
+    ```
+    Set-ExecutionPolicy RemoteSigned
+    ```
+
+    - If you want to change it back, you can with:
+    ```
+    Set-ExecutionPolicy Restricted
+    ```
+
+7. Now rerun step 5 and activate your venv.  After it's activated, you can then run the following command to start up the script:
+```
+python split_audio.py
+```
+
+For more details, please refer to the youtube video.
diff --git a/audio_shortener.py b/audio_shortener.py
@@ -0,0 +1,62 @@
+import os
+
+from scipy.io import wavfile
+from tkinter import filedialog
+from tkinter import *
+
+def run_audiosplitter():
+    # Create the Tkinter root window
+    root = root_audiosplitter
+    root.withdraw()
+
+    # Ask the user to select the audio file directory using the file explorer
+    input_directory = filedialog.askdirectory(title="Select Audio File Directory", parent=root_audiosplitter)
+
+    # Check if a directory was selected
+    if input_directory:
+        # Iterate over the files in the directory
+        for filename in os.listdir(input_directory):
+            if filename.endswith(".wav"):
+                file_path = os.path.join(input_directory, filename)
+                split_audio_file(file_path)
+    else:
+        print("No directory selected.")
+
+    # Close the Tkinter root window
+    root.destroy()
+
+def split_audio_file(file_path, segment_duration=10):
+    # Load the audio file
+    sample_rate, audio_data = wavfile.read(file_path)
+
+    # Calculate the number of segments
+    num_segments = int(len(audio_data) / (sample_rate * segment_duration))
+    remainder = len(audio_data) % (sample_rate * segment_duration)
+    if remainder > 0:
+        num_segments += 1
+
+    # Create the output directory for segments
+    output_dir = os.path.dirname(file_path)
+    base_filename = os.path.splitext(os.path.basename(file_path))[0]
+
+    # Split the audio file into segments
+    for i in range(num_segments):
+        start = i * sample_rate * segment_duration
+        end = min((i + 1) * sample_rate * segment_duration, len(audio_data))
+        segment = audio_data[start:end]
+
+        # Create the output file name
+        segment_filename = f"{base_filename}_{i+1}.wav"
+        segment_path = os.path.join(output_dir, segment_filename)
+
+        # Save the segment as a new WAV file
+        wavfile.write(segment_path, sample_rate, segment)
+
+        print(f"Segment {i+1}/{num_segments} saved: {segment_filename}")
+
+    os.remove(file_path)
+
+root_audiosplitter = Tk()
+root_audiosplitter.withdraw()
+
+run_audiosplitter()
diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -5,4 +5,5 @@ torchvision
 torchaudio
 pysrt
 pydub
-pyyaml
+pyyaml
+wheel
diff --git a/requirements-cuda.txt b/requirements-cuda.txt
@@ -6,4 +6,5 @@ torchvision==0.15.1+cu118
 torchaudio==2.0.1
 pysrt
 pydub
-pyyaml
+pyyaml
+wheel
diff --git a/setup-cuda.py b/setup-cuda.py
@@ -6,7 +6,7 @@
 def create_virtual_environment():
     # Create a virtual environment in the "venv" directory
     try:
-        venv.create('venv-cuda', with_pip=True)
+        venv.create('venv', with_pip=True)
     except Exception as e:
         print(f"Failed to create virtual environment. Error: {e}")
         sys.exit(1)
@@ -26,11 +26,9 @@ def install_requirements():
         print(f"Failed to install requirements. Error: {e}")
         sys.exit(1)
 
-
 def main():
     create_virtual_environment()
     install_requirements()
 
-
 if __name__ == '__main__':
     main()

-Original file line number
+Diff line change
 torchaudio
 pysrt
 pydub
 -pyyaml
 +pyyaml
 +wheel