Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# hear version history

## v0.7 - 08/11/2025

* Added support for specifying audio input device via `-n` flag
* Added support for listing available audio input devices via `-a` flag

## v0.6 - 29/05/2025

* Added timestamped output mode
Expand All @@ -9,15 +14,15 @@

## v0.5 - 05/11/2023

* Now supports setting a timeout for speech recognition via the -t flag
* Now supports setting a timeout for speech recognition via the `-t` flag

## v0.4 - 21/04/2023

* Now supports -p flag to enable adding punctuation to speech recognition results (macOS 13+ only)
* Now supports `-p` flag to enable adding punctuation to speech recognition results (macOS 13+ only)

## v0.3 - 25/03/2023

* Now supports -x flag to specify "exit word", i.e. a word that causes the program to quit when heard
* Now supports `-x` flag to specify "exit word", i.e. a word that causes the program to quit when heard

## v0.2 - 27/10/2022

Expand Down
30 changes: 16 additions & 14 deletions Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleShortVersionString</key>
<string>$(MARKETING_VERSION)</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>MacOSX</string>
</array>
<key>LSMinimumSystemVersion</key>
<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
<key>NSMicrophoneUsageDescription</key>
<string>This command line tool converts spoken audio to text.</string>
<key>NSSpeechRecognitionUsageDescription</key>
<string>This command line tool converts spoken audio to text.</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleShortVersionString</key>
<string>$(MARKETING_VERSION)</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>MacOSX</string>
</array>
<key>LSMinimumSystemVersion</key>
<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
<key>NSMicrophoneUsageDescription</key>
<string>This command line tool converts spoken audio to text.</string>
<key>NSCameraUseContinuityCameraDeviceType</key>
<true/>
<key>NSSpeechRecognitionUsageDescription</key>
<string>This command line tool converts spoken audio to text.</string>
</dict>
</plist>
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
XCODE_PROJ := "hear.xcodeproj"
PROGRAM_NAME := "hear"
BUILD_DIR := "products"
VERSION := "0.6"
VERSION := "0.7"

all: clean build_unsigned

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ See the [man page](https://sveinbjorn.org/files/manpages/hear.1.html) for furthe
[available](https://github.com/sveinbjornt/hear) under a [BSD license](#bsd-license).
**If you find this program useful, please [make a donation](https://sveinbjorn.org/donations).**

* **[⬇ Download hear 0.6](https://sveinbjorn.org/files/software/hear.zip)**
* **[⬇ Download hear 0.7](https://sveinbjorn.org/files/software/hear.zip)**
(~50 KB, ARM/Intel 64-bit, macOS 13 or later, Developer ID signed and notarized by Apple)

## Installation
Expand Down
6 changes: 5 additions & 1 deletion hear.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.Dd May 29, 2025
.Dd Nov 8, 2025
.Dt HEAR 1
.Os Darwin
.Sh NAME
Expand Down Expand Up @@ -48,6 +48,10 @@ Set exit word. This causes the program to exit when a speech recognition result
ends with the specified word.
.It Fl t -timeout Ar seconds
Exit if no recognition results are received within the specified number of seconds.
.It Fl a -audio-input-devices
List available audio input devices and exit.
.It Fl n --input-device-id
Specify ID of audio input device.
.It Fl h -help
Print help and exit.
.It Fl v -version
Expand Down
8 changes: 7 additions & 1 deletion hear.1.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@
Exit if no recognition results are received within the specified
number of seconds.

<b>-a</b> <b>--audio-input-devices</b>
List available audio input devices and exit.

<b>-n</b> <b>--input-device-id</b>
Specify ID of audio input device.

<b>-h</b> <b>--help</b>
Print help and exit.

Expand All @@ -76,6 +82,6 @@

Sveinbjorn Thordarson &lt;sveinbjorn@sveinbjorn.org&gt;

Darwin May 29, 2025 Darwin
Darwin November 8, 2025 Darwin
</font></pre>
</body></html>
2 changes: 1 addition & 1 deletion src/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#import <Foundation/Foundation.h>

#define PROGRAM_NAME @"hear"
#define PROGRAM_VERSION @"0.6"
#define PROGRAM_VERSION @"0.7"
#define PROGRAM_AUTHOR @"Sveinbjorn Thordarson"
#define PROGRAM_AUTHOR_EMAIL @"sveinbjorn@sveinbjorn.org"

Expand Down
7 changes: 6 additions & 1 deletion src/Hear.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@
addTimestamps:(BOOL)addTimestamps
subtitleMode:(BOOL)subtitle
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout;
timeout:(CGFloat)timeout
inputDeviceID:(NSString *)inputDeviceID;

+ (void)printSupportedLocales;
+ (NSArray *)availableAudioInputDevices;
+ (BOOL)hasAvailableAudioInputDevice;
+ (BOOL)isAvailableAudioInputDevice:(NSString *)deviceID;
+ (void)printAvailableAudioInputDevices;

@end
178 changes: 175 additions & 3 deletions src/Hear.m
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#import "Hear.h"
#import "Common.h"
#import <CoreAudio/CoreAudio.h>

@interface Hear()

Expand All @@ -53,6 +54,7 @@ @interface Hear()
@property (nonatomic) BOOL subtitleMode;
@property (nonatomic, retain) NSString *exitWord;
@property (nonatomic) CGFloat timeout;
@property (nonatomic, retain) NSString *inputDeviceID;

@end

Expand All @@ -66,7 +68,9 @@ - (instancetype)initWithLocale:(NSString *)loc
addTimestamps:(BOOL)timestamps
subtitleMode:(BOOL)subtitle
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout {
timeout:(CGFloat)timeout
inputDeviceID:(NSString *)inputDeviceID
{
if ((self = [super init])) {

if ([[Hear supportedLocales] containsObject:loc] == NO) {
Expand All @@ -83,6 +87,7 @@ - (instancetype)initWithLocale:(NSString *)loc
self.subtitleMode = subtitle;
self.exitWord = exitWord;
self.timeout = timeout;
self.inputDeviceID = inputDeviceID;
}
return self;
}
Expand Down Expand Up @@ -144,7 +149,7 @@ - (void)initRecognizer {

// Make sure recognition is available
if (self.recognizer.isAvailable == NO) {
[self die:@"Speech recognizer not available. Try enabling Siri in System Preferences/Settings."];
[self die:@"Speech recognizer not available. Try enabling Siri in System Settings."];
}

if (self.useOnDeviceRecognition && !self.recognizer.supportsOnDeviceRecognition) {
Expand Down Expand Up @@ -301,6 +306,54 @@ - (void)processFileSubtitle {
- (void)startListening {
[self initRecognizer];

// Set the input device, if specified
if (self.inputDeviceID) {
AudioObjectPropertyAddress addr = {
kAudioHardwarePropertyDefaultInputDevice,
kAudioObjectPropertyScopeGlobal,
kAudioObjectPropertyElementMain
};

AudioDeviceID deviceID = kAudioObjectUnknown;

NSArray *devices = [Hear availableAudioInputDevices];
for (NSDictionary *device in devices) {
if ([device[@"id"] isEqualToString:self.inputDeviceID]) {

CFStringRef deviceUID = (__bridge CFStringRef)device[@"id"];

AudioValueTranslation value;
value.mInputData = &deviceUID;
value.mInputDataSize = sizeof(CFStringRef);
value.mOutputData = &deviceID;
value.mOutputDataSize = sizeof(AudioDeviceID);

UInt32 size = sizeof(AudioValueTranslation);

AudioObjectPropertyAddress addr = {
kAudioHardwarePropertyDeviceForUID,
kAudioObjectPropertyScopeGlobal,
kAudioObjectPropertyElementMain
};

OSStatus status = AudioObjectGetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, &size, &value);
if (status != noErr) {
[self die:@"Unable to get device ID for UID '%@'", self.inputDeviceID];
}
break;
}
}

if (deviceID == kAudioObjectUnknown) {
[self die:@"Audio input device with ID '%@' not found", self.inputDeviceID];
}

OSStatus status = AudioObjectSetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, sizeof(AudioDeviceID), &deviceID);
if (status != noErr) {
[self die:@"Error setting audio input device: %d", status];
}
}

// Create speech recognition request
self.request = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
if (self.request == nil) {
Expand Down Expand Up @@ -403,7 +456,7 @@ - (void)timedOut:(id)sender {
exit(EXIT_SUCCESS);
}

#pragma mark - Class methods
#pragma mark - Locales

+ (NSArray<NSString *> *)supportedLocales {
NSMutableArray *localeIdentifiers = [NSMutableArray new];
Expand All @@ -418,6 +471,125 @@ + (void)printSupportedLocales {
NSPrint([[Hear supportedLocales] componentsJoinedByString:@"\n"]);
}

#pragma mark - Audio Input Devices

+ (NSArray *)availableAudioInputDevices {
AudioObjectPropertyAddress addr = {
kAudioHardwarePropertyDevices,
kAudioObjectPropertyScopeGlobal,
kAudioObjectPropertyElementMain
};

UInt32 size;
OSStatus status = AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &addr, 0, NULL, &size);
if (status != noErr) {
return @[];
}

int count = size / sizeof(AudioDeviceID);
AudioDeviceID *deviceIDs = (AudioDeviceID *)malloc(size);
if (deviceIDs == NULL) {
return @[];
}

status = AudioObjectGetPropertyData(kAudioObjectSystemObject, &addr, 0, NULL, &size, deviceIDs);
if (status != noErr) {
free(deviceIDs);
return @[];
}

NSMutableArray *devices = [NSMutableArray array];

for (int i = 0; i < count; i++) {
AudioDeviceID deviceID = deviceIDs[i];

addr.mScope = kAudioDevicePropertyScopeInput;
addr.mSelector = kAudioDevicePropertyStreamConfiguration;
status = AudioObjectGetPropertyDataSize(deviceID, &addr, 0, NULL, &size);
if (status != noErr) {
continue;
}

AudioBufferList *bufferList = (AudioBufferList *)malloc(size);
status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, bufferList);
if (status != noErr) {
free(bufferList);
continue;
}

UInt32 channelCount = 0;
for (int j = 0; j < bufferList->mNumberBuffers; j++) {
channelCount += bufferList->mBuffers[j].mNumberChannels;
}
free(bufferList);

if (channelCount == 0) {
continue;
}

CFStringRef deviceName;
size = sizeof(deviceName);
addr.mSelector = kAudioDevicePropertyDeviceNameCFString;
status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, &deviceName);
if (status != noErr) {
continue;
}

CFStringRef deviceUID;
size = sizeof(deviceUID);
addr.mSelector = kAudioDevicePropertyDeviceUID;
status = AudioObjectGetPropertyData(deviceID, &addr, 0, NULL, &size, &deviceUID);
if (status != noErr) {
CFRelease(deviceName);
continue;
}

[devices addObject:@{
@"name": (__bridge NSString *)deviceName,
@"id": (__bridge NSString *)deviceUID
}];

CFRelease(deviceName);
CFRelease(deviceUID);
}

free(deviceIDs);

return devices;
}

+ (BOOL)hasAvailableAudioInputDevice {
return [[Hear availableAudioInputDevices] count] != 0;
}

+ (BOOL)isAvailableAudioInputDevice:(NSString *)deviceID {
NSArray *devices = [Hear availableAudioInputDevices];
for (NSDictionary *device in devices) {
if ([device[@"id"] isEqualToString:deviceID]) {
return YES;
}
}
return NO;
}

+ (void)printAvailableAudioInputDevices {
NSArray *devices = [Hear availableAudioInputDevices];

if ([devices count] == 0) {
NSPrint(@"No audio input devices available");
return;
}

NSPrint(@"Available Audio Input Devices:");
NSUInteger num = 0;
for (NSDictionary *device in devices) {
num += 1;
NSPrint(@"%lu. %@ (ID: %@)", num, device[@"name"], device[@"id"]);
}
}

#pragma mark - Util

+ (BOOL)isFileSupportedByAVFoundation:(NSString *)filePath {
// Create NSURL from file path
NSURL *fileURL = [NSURL fileURLWithPath:filePath];
Expand Down
Loading