Initial commit: WhisperDictate menu bar app
- Global hotkey (⌃⌥D) for voice dictation - Local Whisper transcription (offline) - Auto-paste to active app - Hungarian language default Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Build
|
||||||
|
WhisperDictate
|
||||||
|
*.o
|
||||||
|
*.dSYM/
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Xcode
|
||||||
|
*.xcodeproj/
|
||||||
|
*.xcworkspace/
|
||||||
|
xcuserdata/
|
||||||
90
README.md
Normal file
90
README.md
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# WhisperDictate
|
||||||
|
|
||||||
|
A simple macOS menu bar app for voice dictation using OpenAI Whisper (local, offline).
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- 🎤 Global hotkey (⌃⌥D) to start/stop recording
|
||||||
|
- 🔒 Fully offline - uses local Whisper model
|
||||||
|
- ⚡ Automatic paste into any app
|
||||||
|
- 🇭🇺 Hungarian language support (configurable)
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- macOS 13.0+
|
||||||
|
- Apple Silicon Mac (M1/M2/M3)
|
||||||
|
- whisper-cpp (`brew install whisper-cpp`)
|
||||||
|
- Whisper model file
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### 1. Install whisper-cpp
|
||||||
|
|
||||||
|
```bash
|
||||||
|
brew install whisper-cpp sox
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Download Whisper model
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p ~/.whisper-models
|
||||||
|
curl -L -o ~/.whisper-models/ggml-medium.bin \
|
||||||
|
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Build WhisperDictate
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/YourUsername/WhisperDictate.git
|
||||||
|
cd WhisperDictate
|
||||||
|
swiftc -o WhisperDictate main.swift \
|
||||||
|
-framework Cocoa \
|
||||||
|
-framework AVFoundation \
|
||||||
|
-framework Carbon \
|
||||||
|
-framework CoreGraphics
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./WhisperDictate
|
||||||
|
```
|
||||||
|
|
||||||
|
Or copy to your bin folder:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp WhisperDictate ~/bin/
|
||||||
|
~/bin/WhisperDictate &
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Look for the 🎤 icon in your menu bar
|
||||||
|
2. Press **⌃⌥D** (Control + Option + D) to start recording
|
||||||
|
3. Speak (icon changes to 🔴)
|
||||||
|
4. Press **⌃⌥D** again to stop and transcribe
|
||||||
|
5. Text is automatically pasted where your cursor is
|
||||||
|
|
||||||
|
## Audio Feedback
|
||||||
|
|
||||||
|
- 🔔 **Tink** - Recording started
|
||||||
|
- 🔔 **Pop** - Recording stopped, processing
|
||||||
|
- 🔔 **Glass** - Success, text pasted
|
||||||
|
- 🔔 **Basso** - Error
|
||||||
|
|
||||||
|
## Permissions
|
||||||
|
|
||||||
|
The app needs:
|
||||||
|
- **Microphone** access (System Settings → Privacy & Security → Microphone)
|
||||||
|
- **Accessibility** access for auto-paste (System Settings → Privacy & Security → Accessibility)
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Edit `main.swift` to change:
|
||||||
|
- Language: Change `"-l", "hu"` to your language code (e.g., `"en"`, `"de"`)
|
||||||
|
- Hotkey: Modify `registerHotkey()` function
|
||||||
|
- Model: Change `whisperModel` path for different model sizes
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License
|
||||||
193
main.swift
Normal file
193
main.swift
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
import Cocoa
|
||||||
|
import AVFoundation
|
||||||
|
import Carbon.HIToolbox
|
||||||
|
|
||||||
|
class AppDelegate: NSObject, NSApplicationDelegate {
|
||||||
|
var statusItem: NSStatusItem!
|
||||||
|
var audioRecorder: AVAudioRecorder?
|
||||||
|
var isRecording = false
|
||||||
|
let audioFilePath = "/tmp/whisper-dictate.wav"
|
||||||
|
let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin"
|
||||||
|
|
||||||
|
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||||
|
// Create menu bar item
|
||||||
|
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
|
||||||
|
statusItem.button?.title = "🎤"
|
||||||
|
|
||||||
|
let menu = NSMenu()
|
||||||
|
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
|
||||||
|
menu.addItem(NSMenuItem.separator())
|
||||||
|
menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
|
||||||
|
statusItem.menu = menu
|
||||||
|
|
||||||
|
// Register global hotkey (Control + Option + D)
|
||||||
|
registerHotkey()
|
||||||
|
|
||||||
|
// Request microphone permission
|
||||||
|
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
||||||
|
if !granted {
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
let alert = NSAlert()
|
||||||
|
alert.messageText = "Microphone access required"
|
||||||
|
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
|
||||||
|
alert.runModal()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func registerHotkey() {
|
||||||
|
// Register Control + Option + D
|
||||||
|
var hotKeyRef: EventHotKeyRef?
|
||||||
|
var gMyHotKeyID = EventHotKeyID()
|
||||||
|
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
|
||||||
|
gMyHotKeyID.id = 1
|
||||||
|
|
||||||
|
// D = 2, Control = 0x1000, Option = 0x0800
|
||||||
|
let modifiers: UInt32 = UInt32(controlKey | optionKey)
|
||||||
|
let keyCode: UInt32 = 2 // D key
|
||||||
|
|
||||||
|
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
|
||||||
|
|
||||||
|
// Install event handler
|
||||||
|
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
|
||||||
|
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
|
||||||
|
let appDelegate = NSApplication.shared.delegate as! AppDelegate
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
appDelegate.toggleRecording()
|
||||||
|
}
|
||||||
|
return noErr
|
||||||
|
}, 1, &eventType, nil, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func toggleRecording() {
|
||||||
|
if isRecording {
|
||||||
|
stopRecordingAndTranscribe()
|
||||||
|
} else {
|
||||||
|
startRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startRecording() {
|
||||||
|
let audioURL = URL(fileURLWithPath: audioFilePath)
|
||||||
|
|
||||||
|
// Remove old file
|
||||||
|
try? FileManager.default.removeItem(at: audioURL)
|
||||||
|
|
||||||
|
let settings: [String: Any] = [
|
||||||
|
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
||||||
|
AVSampleRateKey: 16000,
|
||||||
|
AVNumberOfChannelsKey: 1,
|
||||||
|
AVLinearPCMBitDepthKey: 16,
|
||||||
|
AVLinearPCMIsFloatKey: false,
|
||||||
|
AVLinearPCMIsBigEndianKey: false
|
||||||
|
]
|
||||||
|
|
||||||
|
do {
|
||||||
|
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
|
||||||
|
audioRecorder?.record()
|
||||||
|
isRecording = true
|
||||||
|
statusItem.button?.title = "🔴"
|
||||||
|
NSSound(named: "Tink")?.play()
|
||||||
|
NSLog("Recording started")
|
||||||
|
} catch {
|
||||||
|
NSLog("Recording failed: \(error)")
|
||||||
|
NSSound(named: "Basso")?.play()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopRecordingAndTranscribe() {
|
||||||
|
audioRecorder?.stop()
|
||||||
|
isRecording = false
|
||||||
|
statusItem.button?.title = "⏳"
|
||||||
|
NSSound(named: "Pop")?.play()
|
||||||
|
NSLog("Recording stopped, transcribing...")
|
||||||
|
|
||||||
|
DispatchQueue.global(qos: .userInitiated).async {
|
||||||
|
self.transcribe()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func transcribe() {
|
||||||
|
let task = Process()
|
||||||
|
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
|
||||||
|
task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath]
|
||||||
|
|
||||||
|
let pipe = Pipe()
|
||||||
|
task.standardOutput = pipe
|
||||||
|
task.standardError = FileHandle.nullDevice
|
||||||
|
|
||||||
|
do {
|
||||||
|
try task.run()
|
||||||
|
task.waitUntilExit()
|
||||||
|
|
||||||
|
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
||||||
|
let output = String(data: data, encoding: .utf8) ?? ""
|
||||||
|
|
||||||
|
// Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world"
|
||||||
|
let lines = output.components(separatedBy: "\n")
|
||||||
|
var result = ""
|
||||||
|
for line in lines {
|
||||||
|
if line.hasPrefix("[") {
|
||||||
|
if let range = line.range(of: "]") {
|
||||||
|
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
|
||||||
|
result += text + " "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = result.trimmingCharacters(in: .whitespaces)
|
||||||
|
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
if !result.isEmpty {
|
||||||
|
self.pasteText(result)
|
||||||
|
} else {
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
NSSound(named: "Basso")?.play()
|
||||||
|
NSLog("No speech recognized")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
NSSound(named: "Basso")?.play()
|
||||||
|
NSLog("Transcription failed: \(error)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func pasteText(_ text: String) {
|
||||||
|
// Copy to clipboard
|
||||||
|
let pasteboard = NSPasteboard.general
|
||||||
|
pasteboard.clearContents()
|
||||||
|
pasteboard.setString(text, forType: .string)
|
||||||
|
|
||||||
|
NSLog("Transcribed: \(text)")
|
||||||
|
|
||||||
|
// Simulate Cmd+V
|
||||||
|
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||||
|
let source = CGEventSource(stateID: .hidSystemState)
|
||||||
|
|
||||||
|
// Key down
|
||||||
|
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key
|
||||||
|
keyDown?.flags = .maskCommand
|
||||||
|
keyDown?.post(tap: .cghidEventTap)
|
||||||
|
|
||||||
|
// Key up
|
||||||
|
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
|
||||||
|
keyUp?.flags = .maskCommand
|
||||||
|
keyUp?.post(tap: .cghidEventTap)
|
||||||
|
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
NSSound(named: "Glass")?.play()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main
|
||||||
|
let app = NSApplication.shared
|
||||||
|
let delegate = AppDelegate()
|
||||||
|
app.delegate = delegate
|
||||||
|
app.setActivationPolicy(.accessory) // Menu bar only, no dock icon
|
||||||
|
app.run()
|
||||||
Reference in New Issue
Block a user