Initial commit: WhisperDictate menu bar app
- Global hotkey (⌃⌥D) for voice dictation - Local Whisper transcription (offline) - Auto-paste to active app - Hungarian language default Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# Build
|
||||
WhisperDictate
|
||||
*.o
|
||||
*.dSYM/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
# Xcode
|
||||
*.xcodeproj/
|
||||
*.xcworkspace/
|
||||
xcuserdata/
|
||||
90
README.md
Normal file
90
README.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# WhisperDictate
|
||||
|
||||
A simple macOS menu bar app for voice dictation using OpenAI Whisper (local, offline).
|
||||
|
||||
## Features
|
||||
|
||||
- 🎤 Global hotkey (⌃⌥D) to start/stop recording
|
||||
- 🔒 Fully offline - uses local Whisper model
|
||||
- ⚡ Automatic paste into any app
|
||||
- 🇭🇺 Hungarian language support (configurable)
|
||||
|
||||
## Requirements
|
||||
|
||||
- macOS 13.0+
|
||||
- Apple Silicon Mac (M1/M2/M3)
|
||||
- whisper-cpp (`brew install whisper-cpp`)
|
||||
- Whisper model file
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Install whisper-cpp
|
||||
|
||||
```bash
|
||||
brew install whisper-cpp sox
|
||||
```
|
||||
|
||||
### 2. Download Whisper model
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.whisper-models
|
||||
curl -L -o ~/.whisper-models/ggml-medium.bin \
|
||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
|
||||
```
|
||||
|
||||
### 3. Build WhisperDictate
|
||||
|
||||
```bash
|
||||
git clone https://github.com/YourUsername/WhisperDictate.git
|
||||
cd WhisperDictate
|
||||
swiftc -o WhisperDictate main.swift \
|
||||
-framework Cocoa \
|
||||
-framework AVFoundation \
|
||||
-framework Carbon \
|
||||
-framework CoreGraphics
|
||||
```
|
||||
|
||||
### 4. Run
|
||||
|
||||
```bash
|
||||
./WhisperDictate
|
||||
```
|
||||
|
||||
Or copy to your bin folder:
|
||||
|
||||
```bash
|
||||
cp WhisperDictate ~/bin/
|
||||
~/bin/WhisperDictate &
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Look for the 🎤 icon in your menu bar
|
||||
2. Press **⌃⌥D** (Control + Option + D) to start recording
|
||||
3. Speak (icon changes to 🔴)
|
||||
4. Press **⌃⌥D** again to stop and transcribe
|
||||
5. Text is automatically pasted where your cursor is
|
||||
|
||||
## Audio Feedback
|
||||
|
||||
- 🔔 **Tink** - Recording started
|
||||
- 🔔 **Pop** - Recording stopped, processing
|
||||
- 🔔 **Glass** - Success, text pasted
|
||||
- 🔔 **Basso** - Error
|
||||
|
||||
## Permissions
|
||||
|
||||
The app needs:
|
||||
- **Microphone** access (System Settings → Privacy & Security → Microphone)
|
||||
- **Accessibility** access for auto-paste (System Settings → Privacy & Security → Accessibility)
|
||||
|
||||
## Configuration
|
||||
|
||||
Edit `main.swift` to change:
|
||||
- Language: Change `"-l", "hu"` to your language code (e.g., `"en"`, `"de"`)
|
||||
- Hotkey: Modify `registerHotkey()` function
|
||||
- Model: Change `whisperModel` path for different model sizes
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
||||
193
main.swift
Normal file
193
main.swift
Normal file
@@ -0,0 +1,193 @@
|
||||
import Cocoa
|
||||
import AVFoundation
|
||||
import Carbon.HIToolbox
|
||||
|
||||
class AppDelegate: NSObject, NSApplicationDelegate {
|
||||
var statusItem: NSStatusItem!
|
||||
var audioRecorder: AVAudioRecorder?
|
||||
var isRecording = false
|
||||
let audioFilePath = "/tmp/whisper-dictate.wav"
|
||||
let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin"
|
||||
|
||||
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||
// Create menu bar item
|
||||
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
|
||||
statusItem.button?.title = "🎤"
|
||||
|
||||
let menu = NSMenu()
|
||||
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
|
||||
menu.addItem(NSMenuItem.separator())
|
||||
menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
|
||||
statusItem.menu = menu
|
||||
|
||||
// Register global hotkey (Control + Option + D)
|
||||
registerHotkey()
|
||||
|
||||
// Request microphone permission
|
||||
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
||||
if !granted {
|
||||
DispatchQueue.main.async {
|
||||
let alert = NSAlert()
|
||||
alert.messageText = "Microphone access required"
|
||||
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
|
||||
alert.runModal()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
|
||||
}
|
||||
|
||||
func registerHotkey() {
|
||||
// Register Control + Option + D
|
||||
var hotKeyRef: EventHotKeyRef?
|
||||
var gMyHotKeyID = EventHotKeyID()
|
||||
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
|
||||
gMyHotKeyID.id = 1
|
||||
|
||||
// D = 2, Control = 0x1000, Option = 0x0800
|
||||
let modifiers: UInt32 = UInt32(controlKey | optionKey)
|
||||
let keyCode: UInt32 = 2 // D key
|
||||
|
||||
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
|
||||
|
||||
// Install event handler
|
||||
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
|
||||
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
|
||||
let appDelegate = NSApplication.shared.delegate as! AppDelegate
|
||||
DispatchQueue.main.async {
|
||||
appDelegate.toggleRecording()
|
||||
}
|
||||
return noErr
|
||||
}, 1, &eventType, nil, nil)
|
||||
}
|
||||
|
||||
@objc func toggleRecording() {
|
||||
if isRecording {
|
||||
stopRecordingAndTranscribe()
|
||||
} else {
|
||||
startRecording()
|
||||
}
|
||||
}
|
||||
|
||||
func startRecording() {
|
||||
let audioURL = URL(fileURLWithPath: audioFilePath)
|
||||
|
||||
// Remove old file
|
||||
try? FileManager.default.removeItem(at: audioURL)
|
||||
|
||||
let settings: [String: Any] = [
|
||||
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
||||
AVSampleRateKey: 16000,
|
||||
AVNumberOfChannelsKey: 1,
|
||||
AVLinearPCMBitDepthKey: 16,
|
||||
AVLinearPCMIsFloatKey: false,
|
||||
AVLinearPCMIsBigEndianKey: false
|
||||
]
|
||||
|
||||
do {
|
||||
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
|
||||
audioRecorder?.record()
|
||||
isRecording = true
|
||||
statusItem.button?.title = "🔴"
|
||||
NSSound(named: "Tink")?.play()
|
||||
NSLog("Recording started")
|
||||
} catch {
|
||||
NSLog("Recording failed: \(error)")
|
||||
NSSound(named: "Basso")?.play()
|
||||
}
|
||||
}
|
||||
|
||||
func stopRecordingAndTranscribe() {
|
||||
audioRecorder?.stop()
|
||||
isRecording = false
|
||||
statusItem.button?.title = "⏳"
|
||||
NSSound(named: "Pop")?.play()
|
||||
NSLog("Recording stopped, transcribing...")
|
||||
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
self.transcribe()
|
||||
}
|
||||
}
|
||||
|
||||
func transcribe() {
|
||||
let task = Process()
|
||||
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
|
||||
task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath]
|
||||
|
||||
let pipe = Pipe()
|
||||
task.standardOutput = pipe
|
||||
task.standardError = FileHandle.nullDevice
|
||||
|
||||
do {
|
||||
try task.run()
|
||||
task.waitUntilExit()
|
||||
|
||||
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
||||
let output = String(data: data, encoding: .utf8) ?? ""
|
||||
|
||||
// Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world"
|
||||
let lines = output.components(separatedBy: "\n")
|
||||
var result = ""
|
||||
for line in lines {
|
||||
if line.hasPrefix("[") {
|
||||
if let range = line.range(of: "]") {
|
||||
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
|
||||
result += text + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
result = result.trimmingCharacters(in: .whitespaces)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
if !result.isEmpty {
|
||||
self.pasteText(result)
|
||||
} else {
|
||||
self.statusItem.button?.title = "🎤"
|
||||
NSSound(named: "Basso")?.play()
|
||||
NSLog("No speech recognized")
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
DispatchQueue.main.async {
|
||||
self.statusItem.button?.title = "🎤"
|
||||
NSSound(named: "Basso")?.play()
|
||||
NSLog("Transcription failed: \(error)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func pasteText(_ text: String) {
|
||||
// Copy to clipboard
|
||||
let pasteboard = NSPasteboard.general
|
||||
pasteboard.clearContents()
|
||||
pasteboard.setString(text, forType: .string)
|
||||
|
||||
NSLog("Transcribed: \(text)")
|
||||
|
||||
// Simulate Cmd+V
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||
let source = CGEventSource(stateID: .hidSystemState)
|
||||
|
||||
// Key down
|
||||
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key
|
||||
keyDown?.flags = .maskCommand
|
||||
keyDown?.post(tap: .cghidEventTap)
|
||||
|
||||
// Key up
|
||||
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
|
||||
keyUp?.flags = .maskCommand
|
||||
keyUp?.post(tap: .cghidEventTap)
|
||||
|
||||
self.statusItem.button?.title = "🎤"
|
||||
NSSound(named: "Glass")?.play()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main
|
||||
let app = NSApplication.shared
|
||||
let delegate = AppDelegate()
|
||||
app.delegate = delegate
|
||||
app.setActivationPolicy(.accessory) // Menu bar only, no dock icon
|
||||
app.run()
|
||||
Reference in New Issue
Block a user