From 3d9e798f0288f1e1980d717c919a46c7f6d93bd0 Mon Sep 17 00:00:00 2001 From: hariel1985 Date: Mon, 2 Feb 2026 12:09:39 +0100 Subject: [PATCH] Initial commit: WhisperDictate menu bar app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Global hotkey (⌃⌥D) for voice dictation - Local Whisper transcription (offline) - Auto-paste to active app - Hungarian language default Co-Authored-By: Claude Opus 4.5 --- .gitignore | 12 ++++ README.md | 90 +++++++++++++++++++++++++ main.swift | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 main.swift diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c91e846 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Build +WhisperDictate +*.o +*.dSYM/ + +# macOS +.DS_Store + +# Xcode +*.xcodeproj/ +*.xcworkspace/ +xcuserdata/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd93786 --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# WhisperDictate + +A simple macOS menu bar app for voice dictation using OpenAI Whisper (local, offline). + +## Features + +- 🎤 Global hotkey (⌃⌥D) to start/stop recording +- 🔒 Fully offline - uses local Whisper model +- ⚡ Automatic paste into any app +- 🇭🇺 Hungarian language support (configurable) + +## Requirements + +- macOS 13.0+ +- Apple Silicon Mac (M1/M2/M3) +- whisper-cpp (`brew install whisper-cpp`) +- Whisper model file + +## Installation + +### 1. Install whisper-cpp + +```bash +brew install whisper-cpp sox +``` + +### 2. Download Whisper model + +```bash +mkdir -p ~/.whisper-models +curl -L -o ~/.whisper-models/ggml-medium.bin \ + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin" +``` + +### 3. Build WhisperDictate + +```bash +git clone https://github.com/YourUsername/WhisperDictate.git +cd WhisperDictate +swiftc -o WhisperDictate main.swift \ + -framework Cocoa \ + -framework AVFoundation \ + -framework Carbon \ + -framework CoreGraphics +``` + +### 4. Run + +```bash +./WhisperDictate +``` + +Or copy to your bin folder: + +```bash +cp WhisperDictate ~/bin/ +~/bin/WhisperDictate & +``` + +## Usage + +1. Look for the 🎤 icon in your menu bar +2. Press **⌃⌥D** (Control + Option + D) to start recording +3. Speak (icon changes to 🔴) +4. Press **⌃⌥D** again to stop and transcribe +5. Text is automatically pasted where your cursor is + +## Audio Feedback + +- 🔔 **Tink** - Recording started +- 🔔 **Pop** - Recording stopped, processing +- 🔔 **Glass** - Success, text pasted +- 🔔 **Basso** - Error + +## Permissions + +The app needs: +- **Microphone** access (System Settings → Privacy & Security → Microphone) +- **Accessibility** access for auto-paste (System Settings → Privacy & Security → Accessibility) + +## Configuration + +Edit `main.swift` to change: +- Language: Change `"-l", "hu"` to your language code (e.g., `"en"`, `"de"`) +- Hotkey: Modify `registerHotkey()` function +- Model: Change `whisperModel` path for different model sizes + +## License + +MIT License diff --git a/main.swift b/main.swift new file mode 100644 index 0000000..744966b --- /dev/null +++ b/main.swift @@ -0,0 +1,193 @@ +import Cocoa +import AVFoundation +import Carbon.HIToolbox + +class AppDelegate: NSObject, NSApplicationDelegate { + var statusItem: NSStatusItem! + var audioRecorder: AVAudioRecorder? + var isRecording = false + let audioFilePath = "/tmp/whisper-dictate.wav" + let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" + + func applicationDidFinishLaunching(_ notification: Notification) { + // Create menu bar item + statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength) + statusItem.button?.title = "🎤" + + let menu = NSMenu() + menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: "")) + menu.addItem(NSMenuItem.separator()) + menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q")) + statusItem.menu = menu + + // Register global hotkey (Control + Option + D) + registerHotkey() + + // Request microphone permission + AVCaptureDevice.requestAccess(for: .audio) { granted in + if !granted { + DispatchQueue.main.async { + let alert = NSAlert() + alert.messageText = "Microphone access required" + alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone" + alert.runModal() + } + } + } + + NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.") + } + + func registerHotkey() { + // Register Control + Option + D + var hotKeyRef: EventHotKeyRef? + var gMyHotKeyID = EventHotKeyID() + gMyHotKeyID.signature = OSType(0x57485044) // "WHPD" + gMyHotKeyID.id = 1 + + // D = 2, Control = 0x1000, Option = 0x0800 + let modifiers: UInt32 = UInt32(controlKey | optionKey) + let keyCode: UInt32 = 2 // D key + + RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef) + + // Install event handler + var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed)) + InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in + let appDelegate = NSApplication.shared.delegate as! AppDelegate + DispatchQueue.main.async { + appDelegate.toggleRecording() + } + return noErr + }, 1, &eventType, nil, nil) + } + + @objc func toggleRecording() { + if isRecording { + stopRecordingAndTranscribe() + } else { + startRecording() + } + } + + func startRecording() { + let audioURL = URL(fileURLWithPath: audioFilePath) + + // Remove old file + try? FileManager.default.removeItem(at: audioURL) + + let settings: [String: Any] = [ + AVFormatIDKey: Int(kAudioFormatLinearPCM), + AVSampleRateKey: 16000, + AVNumberOfChannelsKey: 1, + AVLinearPCMBitDepthKey: 16, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false + ] + + do { + audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings) + audioRecorder?.record() + isRecording = true + statusItem.button?.title = "🔴" + NSSound(named: "Tink")?.play() + NSLog("Recording started") + } catch { + NSLog("Recording failed: \(error)") + NSSound(named: "Basso")?.play() + } + } + + func stopRecordingAndTranscribe() { + audioRecorder?.stop() + isRecording = false + statusItem.button?.title = "⏳" + NSSound(named: "Pop")?.play() + NSLog("Recording stopped, transcribing...") + + DispatchQueue.global(qos: .userInitiated).async { + self.transcribe() + } + } + + func transcribe() { + let task = Process() + task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli") + task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath] + + let pipe = Pipe() + task.standardOutput = pipe + task.standardError = FileHandle.nullDevice + + do { + try task.run() + task.waitUntilExit() + + let data = pipe.fileHandleForReading.readDataToEndOfFile() + let output = String(data: data, encoding: .utf8) ?? "" + + // Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world" + let lines = output.components(separatedBy: "\n") + var result = "" + for line in lines { + if line.hasPrefix("[") { + if let range = line.range(of: "]") { + let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces) + result += text + " " + } + } + } + result = result.trimmingCharacters(in: .whitespaces) + + DispatchQueue.main.async { + if !result.isEmpty { + self.pasteText(result) + } else { + self.statusItem.button?.title = "🎤" + NSSound(named: "Basso")?.play() + NSLog("No speech recognized") + } + } + } catch { + DispatchQueue.main.async { + self.statusItem.button?.title = "🎤" + NSSound(named: "Basso")?.play() + NSLog("Transcription failed: \(error)") + } + } + } + + func pasteText(_ text: String) { + // Copy to clipboard + let pasteboard = NSPasteboard.general + pasteboard.clearContents() + pasteboard.setString(text, forType: .string) + + NSLog("Transcribed: \(text)") + + // Simulate Cmd+V + DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { + let source = CGEventSource(stateID: .hidSystemState) + + // Key down + let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key + keyDown?.flags = .maskCommand + keyDown?.post(tap: .cghidEventTap) + + // Key up + let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false) + keyUp?.flags = .maskCommand + keyUp?.post(tap: .cghidEventTap) + + self.statusItem.button?.title = "🎤" + NSSound(named: "Glass")?.play() + } + } +} + +// Main +let app = NSApplication.shared +let delegate = AppDelegate() +app.delegate = delegate +app.setActivationPolicy(.accessory) // Menu bar only, no dock icon +app.run()