diff --git a/CHANGES.md b/CHANGES.md index b706a3f..ecb9112 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # hear version history +## v0.7 - 08/11/2025 + +* Added support for specifying audio input device via `-n` flag +* Added support for listing available audio input devices via `-a` flag + ## v0.6 - 29/05/2025 * Added timestamped output mode @@ -9,15 +14,15 @@ ## v0.5 - 05/11/2023 -* Now supports setting a timeout for speech recognition via the -t flag +* Now supports setting a timeout for speech recognition via the `-t` flag ## v0.4 - 21/04/2023 -* Now supports -p flag to enable adding punctuation to speech recognition results (macOS 13+ only) +* Now supports `-p` flag to enable adding punctuation to speech recognition results (macOS 13+ only) ## v0.3 - 25/03/2023 -* Now supports -x flag to specify "exit word", i.e. a word that causes the program to quit when heard +* Now supports `-x` flag to specify "exit word", i.e. a word that causes the program to quit when heard ## v0.2 - 27/10/2022 diff --git a/Info.plist b/Info.plist index a83be9f..c222f7e 100644 --- a/Info.plist +++ b/Info.plist @@ -2,19 +2,21 @@ - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleShortVersionString - $(MARKETING_VERSION) - CFBundleSupportedPlatforms - - MacOSX - - LSMinimumSystemVersion - $(MACOSX_DEPLOYMENT_TARGET) - NSMicrophoneUsageDescription - This command line tool converts spoken audio to text. - NSSpeechRecognitionUsageDescription - This command line tool converts spoken audio to text. + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleShortVersionString + $(MARKETING_VERSION) + CFBundleSupportedPlatforms + + MacOSX + + LSMinimumSystemVersion + $(MACOSX_DEPLOYMENT_TARGET) + NSMicrophoneUsageDescription + This command line tool converts spoken audio to text. + NSCameraUseContinuityCameraDeviceType + + NSSpeechRecognitionUsageDescription + This command line tool converts spoken audio to text. diff --git a/Makefile b/Makefile index b65fe8c..e035338 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ XCODE_PROJ := "hear.xcodeproj" PROGRAM_NAME := "hear" BUILD_DIR := "products" -VERSION := "0.6" +VERSION := "0.7" all: clean build_unsigned diff --git a/README.md b/README.md index 3b12e4e..5100086 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ See the [man page](https://sveinbjorn.org/files/manpages/hear.1.html) for furthe [available](https://github.com/sveinbjornt/hear) under a [BSD license](#bsd-license). **If you find this program useful, please [make a donation](https://sveinbjorn.org/donations).** -* **[⬇ Download hear 0.6](https://sveinbjorn.org/files/software/hear.zip)** +* **[⬇ Download hear 0.7](https://sveinbjorn.org/files/software/hear.zip)** (~50 KB, ARM/Intel 64-bit, macOS 13 or later, Developer ID signed and notarized by Apple) ## Installation diff --git a/hear.1 b/hear.1 index 142d462..9e9f464 100644 --- a/hear.1 +++ b/hear.1 @@ -1,4 +1,4 @@ -.Dd May 29, 2025 +.Dd Nov 8, 2025 .Dt HEAR 1 .Os Darwin .Sh NAME @@ -48,6 +48,10 @@ Set exit word. This causes the program to exit when a speech recognition result ends with the specified word. .It Fl t -timeout Ar seconds Exit if no recognition results are received within the specified number of seconds. +.It Fl a -audio-input-devices +List available audio input devices and exit. +.It Fl n --input-device-id +Specify ID of audio input device. .It Fl h -help Print help and exit. .It Fl v -version diff --git a/hear.1.html b/hear.1.html index a1908e1..ea134eb 100644 --- a/hear.1.html +++ b/hear.1.html @@ -60,6 +60,12 @@ Exit if no recognition results are received within the specified number of seconds. + -a --audio-input-devices + List available audio input devices and exit. + + -n --input-device-id + Specify ID of audio input device. + -h --help Print help and exit. @@ -76,6 +82,6 @@ Sveinbjorn Thordarson <sveinbjorn@sveinbjorn.org> -Darwin May 29, 2025 Darwin +Darwin November 8, 2025 Darwin \ No newline at end of file diff --git a/src/Common.h b/src/Common.h index f6aa208..95de9d0 100644 --- a/src/Common.h +++ b/src/Common.h @@ -33,7 +33,7 @@ #import #define PROGRAM_NAME @"hear" -#define PROGRAM_VERSION @"0.6" +#define PROGRAM_VERSION @"0.7" #define PROGRAM_AUTHOR @"Sveinbjorn Thordarson" #define PROGRAM_AUTHOR_EMAIL @"sveinbjorn@sveinbjorn.org" diff --git a/src/Hear.h b/src/Hear.h index d3c4bbc..dca193f 100644 --- a/src/Hear.h +++ b/src/Hear.h @@ -44,8 +44,13 @@ addTimestamps:(BOOL)addTimestamps subtitleMode:(BOOL)subtitle exitWord:(NSString *)exitWord - timeout:(CGFloat)timeout; + timeout:(CGFloat)timeout + inputDeviceID:(NSString *)inputDeviceID; + (void)printSupportedLocales; ++ (NSArray *)availableAudioInputDevices; ++ (BOOL)hasAvailableAudioInputDevice; ++ (BOOL)isAvailableAudioInputDevice:(NSString *)deviceID; ++ (void)printAvailableAudioInputDevices; @end diff --git a/src/Hear.m b/src/Hear.m index a02a863..7755547 100644 --- a/src/Hear.m +++ b/src/Hear.m @@ -32,6 +32,7 @@ #import "Hear.h" #import "Common.h" +#import @interface Hear() @@ -53,6 +54,7 @@ @interface Hear() @property (nonatomic) BOOL subtitleMode; @property (nonatomic, retain) NSString *exitWord; @property (nonatomic) CGFloat timeout; +@property (nonatomic, retain) NSString *inputDeviceID; @end @@ -66,7 +68,9 @@ - (instancetype)initWithLocale:(NSString *)loc addTimestamps:(BOOL)timestamps subtitleMode:(BOOL)subtitle exitWord:(NSString *)exitWord - timeout:(CGFloat)timeout { + timeout:(CGFloat)timeout + inputDeviceID:(NSString *)inputDeviceID +{ if ((self = [super init])) { if ([[Hear supportedLocales] containsObject:loc] == NO) { @@ -83,6 +87,7 @@ - (instancetype)initWithLocale:(NSString *)loc self.subtitleMode = subtitle; self.exitWord = exitWord; self.timeout = timeout; + self.inputDeviceID = inputDeviceID; } return self; } @@ -144,7 +149,7 @@ - (void)initRecognizer { // Make sure recognition is available if (self.recognizer.isAvailable == NO) { - [self die:@"Speech recognizer not available. Try enabling Siri in System Preferences/Settings."]; + [self die:@"Speech recognizer not available. Try enabling Siri in System Settings."]; } if (self.useOnDeviceRecognition && !self.recognizer.supportsOnDeviceRecognition) { @@ -301,6 +306,54 @@ - (void)processFileSubtitle { - (void)startListening { [self initRecognizer]; + // Set the input device, if specified + if (self.inputDeviceID) { + AudioObjectPropertyAddress addr = { + kAudioHardwarePropertyDefaultInputDevice, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMain + }; + + AudioDeviceID deviceID = kAudioObjectUnknown; + + NSArray *devices = [Hear availableAudioInputDevices]; + for (NSDictionary *device in devices) { + if ([device[@"id"] isEqualToString:self.inputDeviceID]) { + + CFStringRef deviceUID = (__bridge CFStringRef)device[@"id"]; + + AudioValueTranslation value; + value.mInputData = &deviceUID; + value.mInputDataSize = sizeof(CFStringRef); + value.mOutputData = &deviceID; + value.mOutputDataSize = sizeof(AudioDeviceID); + + UInt32 size = sizeof(AudioValueTranslation); + + AudioObjectPropertyAddress addr = { + kAudioHardwarePropertyDeviceForUID, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMain + }; + + OSStatus status = AudioObjectGetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, &size, &value); + if (status != noErr) { + [self die:@"Unable to get device ID for UID '%@'", self.inputDeviceID]; + } + break; + } + } + + if (deviceID == kAudioObjectUnknown) { + [self die:@"Audio input device with ID '%@' not found", self.inputDeviceID]; + } + + OSStatus status = AudioObjectSetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, sizeof(AudioDeviceID), &deviceID); + if (status != noErr) { + [self die:@"Error setting audio input device: %d", status]; + } + } + // Create speech recognition request self.request = [[SFSpeechAudioBufferRecognitionRequest alloc] init]; if (self.request == nil) { @@ -403,7 +456,7 @@ - (void)timedOut:(id)sender { exit(EXIT_SUCCESS); } -#pragma mark - Class methods +#pragma mark - Locales + (NSArray *)supportedLocales { NSMutableArray *localeIdentifiers = [NSMutableArray new]; @@ -418,6 +471,125 @@ + (void)printSupportedLocales { NSPrint([[Hear supportedLocales] componentsJoinedByString:@"\n"]); } +#pragma mark - Audio Input Devices + ++ (NSArray *)availableAudioInputDevices { + AudioObjectPropertyAddress addr = { + kAudioHardwarePropertyDevices, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMain + }; + + UInt32 size; + OSStatus status = AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &addr, 0, NULL, &size); + if (status != noErr) { + return @[]; + } + + int count = size / sizeof(AudioDeviceID); + AudioDeviceID *deviceIDs = (AudioDeviceID *)malloc(size); + if (deviceIDs == NULL) { + return @[]; + } + + status = AudioObjectGetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, &size, deviceIDs); + if (status != noErr) { + free(deviceIDs); + return @[]; + } + + NSMutableArray *devices = [NSMutableArray array]; + + for (int i = 0; i < count; i++) { + AudioDeviceID deviceID = deviceIDs[i]; + + addr.mScope = kAudioDevicePropertyScopeInput; + addr.mSelector = kAudioDevicePropertyStreamConfiguration; + status = AudioObjectGetPropertyDataSize(deviceID, &addr, 0, NULL, &size); + if (status != noErr) { + continue; + } + + AudioBufferList *bufferList = (AudioBufferList *)malloc(size); + status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, bufferList); + if (status != noErr) { + free(bufferList); + continue; + } + + UInt32 channelCount = 0; + for (int j = 0; j < bufferList->mNumberBuffers; j++) { + channelCount += bufferList->mBuffers[j].mNumberChannels; + } + free(bufferList); + + if (channelCount == 0) { + continue; + } + + CFStringRef deviceName; + size = sizeof(deviceName); + addr.mSelector = kAudioDevicePropertyDeviceNameCFString; + status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, &deviceName); + if (status != noErr) { + continue; + } + + CFStringRef deviceUID; + size = sizeof(deviceUID); + addr.mSelector = kAudioDevicePropertyDeviceUID; + status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, &deviceUID); + if (status != noErr) { + CFRelease(deviceName); + continue; + } + + [devices addObject:@{ + @"name": (__bridge NSString *)deviceName, + @"id": (__bridge NSString *)deviceUID + }]; + + CFRelease(deviceName); + CFRelease(deviceUID); + } + + free(deviceIDs); + + return devices; +} + ++ (BOOL)hasAvailableAudioInputDevice { + return [[Hear availableAudioInputDevices] count] != 0; +} + ++ (BOOL)isAvailableAudioInputDevice:(NSString *)deviceID { + NSArray *devices = [Hear availableAudioInputDevices]; + for (NSDictionary *device in devices) { + if ([device[@"id"] isEqualToString:deviceID]) { + return YES; + } + } + return NO; +} + ++ (void)printAvailableAudioInputDevices { + NSArray *devices = [Hear availableAudioInputDevices]; + + if ([devices count] == 0) { + NSPrint(@"No audio input devices available"); + return; + } + + NSPrint(@"Available Audio Input Devices:"); + NSUInteger num = 0; + for (NSDictionary *device in devices) { + num += 1; + NSPrint(@"%lu. %@ (ID: %@)", num, device[@"name"], device[@"id"]); + } +} + +#pragma mark - Util + + (BOOL)isFileSupportedByAVFoundation:(NSString *)filePath { // Create NSURL from file path NSURL *fileURL = [NSURL fileURLWithPath:filePath]; diff --git a/src/main.m b/src/main.m index b2a32ce..0bce0e8 100644 --- a/src/main.m +++ b/src/main.m @@ -42,7 +42,7 @@ static inline void PrintHelp(void); // Command line options -static const char optstring[] = "sl:i:dpmx:t:TShv"; +static const char optstring[] = "sl:i:dpmx:t:an:TShv"; static struct option long_options[] = { // List supported locales for speech to text @@ -65,6 +65,10 @@ {"exit-word", required_argument, 0, 'x'}, // Timeout (in seconds) {"timeout", required_argument, 0, 't'}, + // List available audio input devices + {"audio-input-devices", no_argument, 0, 'a'}, + // Specify ID of audio input device + {"input-device-id", required_argument, 0, 'n'}, // Print help {"help", no_argument, 0, 'h'}, // Print version @@ -80,10 +84,11 @@ int main(int argc, const char * argv[]) { @autoreleasepool { NSPrintErr(@"This program requires macOS Catalina 10.15 or later."); exit(EXIT_FAILURE); } - + NSString *locale = DEFAULT_LOCALE; NSString *inputFilename; NSString *exitWord; + NSString *inputDeviceID = nil; BOOL useOnDeviceRecognition = NO; BOOL singleLineMode = NO; BOOL addsPunctuation = NO; @@ -128,7 +133,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool { case 'p': addsPunctuation = YES; break; - + // Whether to add timestamps to speech recognition results // This option is ignored on macOS versions prior to Ventura case 'T': @@ -150,6 +155,19 @@ int main(int argc, const char * argv[]) { @autoreleasepool { timeout = [@(optarg) floatValue]; break; + case 'a': + [Hear printAvailableAudioInputDevices]; + exit(EXIT_SUCCESS); + break; + + case 'n': + inputDeviceID = @(optarg); + if ([Hear isAvailableAudioInputDevice:inputDeviceID] == NO) { + NSPrintErr(@"The device '%@' is not a valid audio input device.", inputDeviceID); + exit(EXIT_FAILURE); + } + break; + // Print version case 'v': PrintVersion(); @@ -165,6 +183,11 @@ int main(int argc, const char * argv[]) { @autoreleasepool { } } + if (inputFilename == nil && [Hear hasAvailableAudioInputDevice] == FALSE) { + NSPrintErr(@"No available audio input device."); + exit(EXIT_FAILURE); + } + // Instantiate app delegate object with core program functionality Hear *hear = [[Hear alloc] initWithLocale:locale input:inputFilename @@ -174,7 +197,8 @@ int main(int argc, const char * argv[]) { @autoreleasepool { addTimestamps:addsTimestamps subtitleMode:subtitleMode exitWord:exitWord - timeout:timeout]; + timeout:timeout + inputDeviceID:inputDeviceID]; [[NSApplication sharedApplication] setDelegate:hear]; [NSApp run]; @@ -206,17 +230,19 @@ static inline void PrintHelp(void) { \n\ Options:\n\ \n\ - -s --supported Print list of supported locales\n\ + -s --supported Print list of supported locales\n\ \n\ - -l --locale Specify speech recognition locale\n\ - -i --input [file_path] Specify audio file to process\n\ - -d --device Only use on-device speech recognition\n\ - -m --mode Enable single-line output mode (mic only)\n\ - -p --punctuation Add punctuation to speech recognition results (macOS 13+)\n\ - -x --exit-word Set exit word that causes program to quit\n\ - -t --timeout Set silence timeout (in seconds)\n\ - -T --timestamps Write timestamps as transcription occurs (file input only)\n\ - -S --subtitle Enable subtitle mode, producing .srt output (file input only)\n\ + -l --locale Specify speech recognition locale\n\ + -i --input [file_path] Specify audio file to process\n\ + -d --device Only use on-device speech recognition\n\ + -m --mode Enable single-line output mode (mic only)\n\ + -p --punctuation Add punctuation to speech recognition results (macOS 13+)\n\ + -x --exit-word Set exit word that causes program to quit\n\ + -t --timeout Set silence timeout (in seconds)\n\ + -T --timestamps Write timestamps as transcription occurs (file input only)\n\ + -S --subtitle Enable subtitle mode, producing .srt output (file input only)\n\ + -a --audio-input-devices List available audio input devices\n\ + -n --input-device-id Specify ID of audio input device\n\ \n\ -h --help Prints help\n\ -v --version Prints program name and version\n\