Initial commit: WhisperDictate menu bar app

- Global hotkey (⌃⌥D) for voice dictation
- Local Whisper transcription (offline)
- Auto-paste to active app
- Hungarian language default

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hariel1985
2026-02-02 12:09:39 +01:00
commit 3d9e798f02
3 fájl változott, egészen pontosan 295 új sor hozzáadva és 0 régi sor törölve

193
main.swift Normal file
Fájl megtekintése

@@ -0,0 +1,193 @@
import Cocoa
import AVFoundation
import Carbon.HIToolbox
class AppDelegate: NSObject, NSApplicationDelegate {
var statusItem: NSStatusItem!
var audioRecorder: AVAudioRecorder?
var isRecording = false
let audioFilePath = "/tmp/whisper-dictate.wav"
let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin"
func applicationDidFinishLaunching(_ notification: Notification) {
// Create menu bar item
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
statusItem.button?.title = "🎤"
let menu = NSMenu()
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
menu.addItem(NSMenuItem.separator())
menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
statusItem.menu = menu
// Register global hotkey (Control + Option + D)
registerHotkey()
// Request microphone permission
AVCaptureDevice.requestAccess(for: .audio) { granted in
if !granted {
DispatchQueue.main.async {
let alert = NSAlert()
alert.messageText = "Microphone access required"
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
alert.runModal()
}
}
}
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
}
func registerHotkey() {
// Register Control + Option + D
var hotKeyRef: EventHotKeyRef?
var gMyHotKeyID = EventHotKeyID()
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
gMyHotKeyID.id = 1
// D = 2, Control = 0x1000, Option = 0x0800
let modifiers: UInt32 = UInt32(controlKey | optionKey)
let keyCode: UInt32 = 2 // D key
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
// Install event handler
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
let appDelegate = NSApplication.shared.delegate as! AppDelegate
DispatchQueue.main.async {
appDelegate.toggleRecording()
}
return noErr
}, 1, &eventType, nil, nil)
}
@objc func toggleRecording() {
if isRecording {
stopRecordingAndTranscribe()
} else {
startRecording()
}
}
func startRecording() {
let audioURL = URL(fileURLWithPath: audioFilePath)
// Remove old file
try? FileManager.default.removeItem(at: audioURL)
let settings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVSampleRateKey: 16000,
AVNumberOfChannelsKey: 1,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false
]
do {
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
audioRecorder?.record()
isRecording = true
statusItem.button?.title = "🔴"
NSSound(named: "Tink")?.play()
NSLog("Recording started")
} catch {
NSLog("Recording failed: \(error)")
NSSound(named: "Basso")?.play()
}
}
func stopRecordingAndTranscribe() {
audioRecorder?.stop()
isRecording = false
statusItem.button?.title = ""
NSSound(named: "Pop")?.play()
NSLog("Recording stopped, transcribing...")
DispatchQueue.global(qos: .userInitiated).async {
self.transcribe()
}
}
func transcribe() {
let task = Process()
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath]
let pipe = Pipe()
task.standardOutput = pipe
task.standardError = FileHandle.nullDevice
do {
try task.run()
task.waitUntilExit()
let data = pipe.fileHandleForReading.readDataToEndOfFile()
let output = String(data: data, encoding: .utf8) ?? ""
// Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world"
let lines = output.components(separatedBy: "\n")
var result = ""
for line in lines {
if line.hasPrefix("[") {
if let range = line.range(of: "]") {
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
result += text + " "
}
}
}
result = result.trimmingCharacters(in: .whitespaces)
DispatchQueue.main.async {
if !result.isEmpty {
self.pasteText(result)
} else {
self.statusItem.button?.title = "🎤"
NSSound(named: "Basso")?.play()
NSLog("No speech recognized")
}
}
} catch {
DispatchQueue.main.async {
self.statusItem.button?.title = "🎤"
NSSound(named: "Basso")?.play()
NSLog("Transcription failed: \(error)")
}
}
}
func pasteText(_ text: String) {
// Copy to clipboard
let pasteboard = NSPasteboard.general
pasteboard.clearContents()
pasteboard.setString(text, forType: .string)
NSLog("Transcribed: \(text)")
// Simulate Cmd+V
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
let source = CGEventSource(stateID: .hidSystemState)
// Key down
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key
keyDown?.flags = .maskCommand
keyDown?.post(tap: .cghidEventTap)
// Key up
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
keyUp?.flags = .maskCommand
keyUp?.post(tap: .cghidEventTap)
self.statusItem.button?.title = "🎤"
NSSound(named: "Glass")?.play()
}
}
}
// Main
let app = NSApplication.shared
let delegate = AppDelegate()
app.delegate = delegate
app.setActivationPolicy(.accessory) // Menu bar only, no dock icon
app.run()