diff --git a/node_modules/@react-native-voice/voice/dist/index.js b/node_modules/@react-native-voice/voice/dist/index.js index a10af13..916dc18 100644 --- a/node_modules/@react-native-voice/voice/dist/index.js +++ b/node_modules/@react-native-voice/voice/dist/index.js @@ -72,7 +72,7 @@ class RCTVoice { }, options), callback); } else { - Voice.startSpeech(locale, callback); + Voice.startSpeech(locale, options, callback); } }); } diff --git a/node_modules/@react-native-voice/voice/ios/Voice/Voice.m b/node_modules/@react-native-voice/voice/ios/Voice/Voice.m index fd9dad8..dbff760 100644 --- a/node_modules/@react-native-voice/voice/ios/Voice/Voice.m +++ b/node_modules/@react-native-voice/voice/ios/Voice/Voice.m @@ -108,7 +108,7 @@ self.audioSession = [AVAudioSession sharedInstance]; } // Set audio session to inactive and notify other sessions - // [self.audioSession setActive:NO withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error: nil]; + //[self.audioSession setActive:NO withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error: nil]; NSString* audioCategory = [self.audioSession category]; // Category hasn't changed -- do nothing if ([self.priorAudioCategory isEqualToString:audioCategory]) return; @@ -118,11 +118,12 @@ } else { [self.audioSession setCategory:self.priorAudioCategory withOptions:AVAudioSessionCategoryOptionDefaultToSpeaker error: nil]; } + // Remove pointer reference self.audioSession = nil; } -- (void) setupAndStartRecognizing:(NSString*)localeStr { +- (void) setupAndStartRecognizing:(NSString*)localeStr :(NSDictionary *)options { self.audioSession = [AVAudioSession sharedInstance]; self.priorAudioCategory = [self.audioSession category]; // Tear down resources before starting speech recognition.. @@ -173,7 +174,6 @@ [self sendEventWithName:@"onSpeechStart" body:nil]; - // A recognition task represents a speech recognition session. // We keep a reference to the task so that it can be cancelled. NSString *taskSessionId = self.sessionId; @@ -216,10 +216,22 @@ }]; - AVAudioFormat* recordingFormat = [inputNode outputFormatForBus:0]; + AVAudioFormat* recordingFormat = [inputNode inputFormatForBus:0]; AVAudioMixerNode *mixer = [[AVAudioMixerNode alloc] init]; [self.audioEngine attachNode:mixer]; + AVAudioFile *audioFile = nil; + if(options[@"audioUri"]){ + NSError* recordError = nil; + audioFile=[[AVAudioFile alloc] + initForWriting:[NSURL fileURLWithPath:options[@"audioUri"]] + settings:recordingFormat.settings + error:&recordError]; + if (recordError != nil) { + [self sendResult:@{@"code": @"record_error", @"message": [recordError localizedDescription], @"domain": [recordError domain], @"audioUri":options[@"audioUri"]} :nil :nil :nil]; + } + } + // Start recording and append recording buffer to speech recognizer @try { [mixer installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) { @@ -253,6 +265,9 @@ // Todo: write recording buffer to file (if user opts in) if (self.recognitionRequest != nil) { [self.recognitionRequest appendAudioPCMBuffer:buffer]; + if(audioFile != nil){ + [audioFile writeFromBuffer:buffer error:nil]; + } } }]; } @catch (NSException *exception) { @@ -368,7 +383,7 @@ RCT_EXPORT_METHOD(isRecognizing:(RCTResponseSenderBlock)callback) { } } -RCT_EXPORT_METHOD(startSpeech:(NSString*)localeStr callback:(RCTResponseSenderBlock)callback) { +RCT_EXPORT_METHOD(startSpeech:(NSString*)localeStr options:(NSDictionary *)options callback:(RCTResponseSenderBlock)callback) { if (self.recognitionTask != nil) { [self sendResult:RCTMakeError(@"Speech recognition already started!", nil, nil) :nil :nil :nil]; return; @@ -386,7 +401,7 @@ RCT_EXPORT_METHOD(startSpeech:(NSString*)localeStr callback:(RCTResponseSenderBl [self sendResult:RCTMakeError(@"Speech recognition restricted on this device", nil, nil) :nil :nil :nil]; break; case SFSpeechRecognizerAuthorizationStatusAuthorized: - [self setupAndStartRecognizing:localeStr]; + [self setupAndStartRecognizing:localeStr :options]; break; } }]; diff --git a/node_modules/@react-native-voice/voice/src/index.ts b/node_modules/@react-native-voice/voice/src/index.ts index 8b51121..97d1f93 100644 --- a/node_modules/@react-native-voice/voice/src/index.ts +++ b/node_modules/@react-native-voice/voice/src/index.ts @@ -96,7 +96,7 @@ class RCTVoice { callback, ); } else { - Voice.startSpeech(locale, callback); + Voice.startSpeech(locale, options, callback); } }); } diff --git a/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/AudioRecorder.java b/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/AudioRecorder.java new file mode 100644 index 0000000..7395874 --- /dev/null +++ b/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/AudioRecorder.java @@ -0,0 +1,125 @@ +package com.wenkesj.voice; + +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.media.MediaRecorder; +import android.util.Log; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public class AudioRecorder { + private static final int SAMPLE_RATE = 44100; + private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO; + private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT; + private static final String TAG = "AudioRecorder"; + + private AudioRecord audioRecord; + private boolean isRecording = false; + private FileOutputStream audioOutput; + + public void startRecording(String wavFile) { + + int bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT); + audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT, bufferSize); + + try { + audioOutput = new FileOutputStream(wavFile); + isRecording = true; + audioRecord.startRecording(); + + // Start a thread to continuously write audio data to the file + new Thread(() -> { + try { + writeWavHeader(audioOutput, bufferSize); + byte[] buffer = new byte[bufferSize]; + while (isRecording) { + int bytesRead = audioRecord.read(buffer, 0, bufferSize); + if (bytesRead > 0) { + audioOutput.write(buffer, 0, bytesRead); + } + } + } catch (IOException e) { + Log.e(TAG, "Error writing audio data", e); + } + }).start(); + + } catch (IOException e) { + Log.e(TAG, "Error creating audio file", e); + } + } + + public void stopRecording() { + isRecording = false; + if (audioRecord != null) { + audioRecord.stop(); + audioRecord.release(); + audioRecord = null; + } + if (audioOutput != null) { + try { + audioOutput.close(); + } catch (IOException e) { + Log.e(TAG, "Error closing audio file", e); + } + } + } + + private void writeWavHeader(OutputStream outputStream, int bufferSize) throws IOException { + long totalAudioLen = bufferSize * 10000; // 10 seconds, adjust as needed + long totalDataLen = totalAudioLen + 36; + long longSampleRate = SAMPLE_RATE; + int channels = 1; + long byteRate = 16 * SAMPLE_RATE * channels / 8; + + byte[] header = new byte[44]; + header[0] = 'R'; // RIFF/WAVE header + header[1] = 'I'; + header[2] = 'F'; + header[3] = 'F'; + header[4] = (byte) (totalDataLen & 0xff); + header[5] = (byte) ((totalDataLen >> 8) & 0xff); + header[6] = (byte) ((totalDataLen >> 16) & 0xff); + header[7] = (byte) ((totalDataLen >> 24) & 0xff); + header[8] = 'W'; + header[9] = 'A'; + header[10] = 'V'; + header[11] = 'E'; + header[12] = 'f'; // 'fmt ' chunk + header[13] = 'm'; + header[14] = 't'; + header[15] = ' '; + header[16] = 16; // 16 for PCM + header[17] = 0; + header[18] = 0; + header[19] = 0; + header[20] = 1; // PCM format + header[21] = 0; + header[22] = (byte) channels; + header[23] = 0; + header[24] = (byte) (longSampleRate & 0xff); + header[25] = (byte) ((longSampleRate >> 8) & 0xff); + header[26] = (byte) ((longSampleRate >> 16) & 0xff); + header[27] = (byte) ((longSampleRate >> 24) & 0xff); + header[28] = (byte) (byteRate & 0xff); + header[29] = (byte) ((byteRate >> 8) & 0xff); + header[30] = (byte) ((byteRate >> 16) & 0xff); + header[31] = (byte) ((byteRate >> 24) & 0xff); + header[32] = (byte) (16 * channels / 8); // Block align + header[33] = 0; + header[34] = 16; // Bits per sample + header[35] = 0; + header[36] = 'd'; + header[37] = 'a'; + header[38] = 't'; + header[39] = 'a'; + header[40] = (byte) (totalAudioLen & 0xff); + header[41] = (byte) ((totalAudioLen >> 8) & 0xff); + header[42] = (byte) ((totalAudioLen >> 16) & 0xff); + header[43] = (byte) ((totalAudioLen >> 24) & 0xff); + + outputStream.write(header, 0, 44); + } +} diff --git a/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/VoiceModule.java b/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/VoiceModule.java index f22833e..fb048ff 100644 --- a/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/VoiceModule.java +++ b/node_modules/@react-native-voice/voice/android/src/main/java/com/wenkesj/voice/VoiceModule.java @@ -40,6 +40,7 @@ public class VoiceModule extends ReactContextBaseJavaModule implements Recogniti private SpeechRecognizer speech = null; private boolean isRecognizing = false; private String locale = null; + private AudioRecorder audioRecorder; public VoiceModule(ReactApplicationContext reactContext) { super(reactContext); @@ -137,6 +138,10 @@ public class VoiceModule extends ReactContextBaseJavaModule implements Recogniti startListening(opts); isRecognizing = true; callback.invoke(false); + if(opts.hasKey("audioUri")){ + audioRecorder = new AudioRecorder(); + audioRecorder.startRecording(opts.getString("audioUri")); + } } catch (Exception e) { callback.invoke(e.getMessage()); } @@ -180,6 +185,10 @@ public class VoiceModule extends ReactContextBaseJavaModule implements Recogniti @Override public void run() { try { + if(audioRecorder != null){ + audioRecorder.stopRecording(); + audioRecorder=null; + } if (speech != null) { speech.stopListening(); }