6
6
import javax .sound .sampled .TargetDataLine ;
7
7
import java .util .concurrent .*;
8
8
import java .util .ArrayList ;
9
+ import javax .sound .sampled .AudioFormat ;
10
+ import javax .sound .sampled .AudioInputStream ;
11
+ import javax .sound .sampled .AudioSystem ;
9
12
10
13
public class VoiceClientWithMic {
11
14
@@ -28,18 +31,35 @@ public ArrayList<String> asr(TargetDataLine line) throws Exception {
28
31
ws .addListener (new CustomWebSocketAdapter ());
29
32
ws .connect ();
30
33
31
- byte [] buf = new byte [ 8000 ];
32
- while ( true ) {
33
- line . read ( buf , 0 , buf . length );
34
- if ( results . size () > 0 && results . get ( results . size () - 1 ). contains ( "exit" )) {
35
- disconnect ( ws );
36
- break ;
34
+ try {
35
+ AudioInputStream audioInputStream = new AudioInputStream ( line );
36
+ AudioFormat format = audioInputStream . getFormat ( );
37
+ int bytesPerFrame = format . getFrameSize ();
38
+ if ( bytesPerFrame == AudioSystem . NOT_SPECIFIED ) {
39
+ bytesPerFrame = 1 ;
37
40
}
38
- recieveLatch = new CountDownLatch (1 );
39
- ws .sendBinary (buf );
40
- recieveLatch .await ();
41
- }
42
41
42
+ // Let Vosk server now the sample rate of sound file
43
+ ws .sendText ("{ \" config\" : { \" sample_rate\" : " + (int )format .getSampleRate () + " } }" );
44
+
45
+ // Set an arbitrary buffer size of 1024 frames.
46
+ int numBytes = 1024 * bytesPerFrame ;
47
+ byte [] audioBytes = new byte [numBytes ];
48
+ try {
49
+ int numBytesRead = 0 ;
50
+ // Try to read numBytes bytes from the file.
51
+ while ((numBytesRead = audioInputStream .read (audioBytes )) != -1 ) {
52
+ recieveLatch = new CountDownLatch (1 );
53
+ ws .sendBinary (audioBytes );
54
+ recieveLatch .await ();
55
+ }
56
+ disconnect (ws );
57
+ } catch (Exception e ) {
58
+ e .printStackTrace ();
59
+ }
60
+ } catch (Exception ex ) {
61
+ ex .printStackTrace ();
62
+ }
43
63
return results ;
44
64
}
45
65
@@ -59,9 +79,9 @@ private void disconnect(WebSocket ws) {
59
79
private TargetDataLine getLine () {
60
80
TargetDataLine line ;
61
81
62
- //It must be a 16 kHz (or 8 kHz, depending on the training data), 16bit Mono (= single channel) Little -Endian file
82
+ //It must be a 16 kHz (or 8 kHz, depending on the training data), 16bit Mono (= single channel) Big -Endian
63
83
AudioFormat .Encoding encoding = AudioFormat .Encoding .PCM_SIGNED ;
64
- float rate = 8000 .0f ;
84
+ float rate = 16000 .0f ;
65
85
int channels = 1 ;
66
86
int sampleSize = 16 ;
67
87
boolean bigEndian = false ;
0 commit comments