From 1727f4ba5b9f4023a05a5bef8775ada876ea0c16 Mon Sep 17 00:00:00 2001 From: hariel1985 Date: Mon, 2 Feb 2026 12:31:42 +0100 Subject: [PATCH] Restructure for multi-platform support and add Settings UI - Move macOS code to macos/ directory for platform separation - Add Settings window with configurable language, model path, and sound toggle - Add launch at login support using SMAppService - Add proper .app bundle structure with Info.plist - Add Makefile for build, install, run, and dmg targets - Store preferences in UserDefaults for persistence Co-Authored-By: Claude Opus 4.5 --- macos/Makefile | 51 +++ macos/WhisperDictate.app/Contents/Info.plist | 36 ++ macos/src/main.swift | 412 +++++++++++++++++++ main.swift | 193 --------- 4 files changed, 499 insertions(+), 193 deletions(-) create mode 100644 macos/Makefile create mode 100644 macos/WhisperDictate.app/Contents/Info.plist create mode 100644 macos/src/main.swift delete mode 100644 main.swift diff --git a/macos/Makefile b/macos/Makefile new file mode 100644 index 0000000..bedb241 --- /dev/null +++ b/macos/Makefile @@ -0,0 +1,51 @@ +# WhisperDictate macOS Build + +APP_NAME = WhisperDictate +APP_BUNDLE = $(APP_NAME).app +VERSION = 1.0.0 + +# Directories +SRC_DIR = src +BUILD_DIR = build +BUNDLE_DIR = $(APP_BUNDLE)/Contents + +# Compiler settings +SWIFT = swiftc +SWIFT_FLAGS = -O -framework Cocoa -framework AVFoundation -framework Carbon -framework CoreGraphics -framework ServiceManagement + +.PHONY: all clean build install run dmg + +all: build + +build: $(BUILD_DIR)/$(APP_NAME) + @echo "✓ Build complete" + +$(BUILD_DIR)/$(APP_NAME): $(SRC_DIR)/main.swift + @mkdir -p $(BUILD_DIR) + @mkdir -p $(BUNDLE_DIR)/MacOS + @mkdir -p $(BUNDLE_DIR)/Resources + $(SWIFT) $(SWIFT_FLAGS) -o $(BUNDLE_DIR)/MacOS/$(APP_NAME) $(SRC_DIR)/main.swift + @cp $(APP_BUNDLE)/Contents/Info.plist $(BUNDLE_DIR)/ 2>/dev/null || true + @touch $(BUILD_DIR)/$(APP_NAME) + @echo "✓ Built $(APP_BUNDLE)" + +clean: + rm -rf $(BUILD_DIR) + rm -rf $(BUNDLE_DIR)/MacOS/$(APP_NAME) + @echo "✓ Cleaned" + +install: build + @rm -rf /Applications/$(APP_BUNDLE) + @cp -R $(APP_BUNDLE) /Applications/ + @codesign --force --deep --sign - /Applications/$(APP_BUNDLE) + @echo "✓ Installed to /Applications/$(APP_BUNDLE)" + +run: build + @./$(BUNDLE_DIR)/MacOS/$(APP_NAME) & + @echo "✓ Running $(APP_NAME)" + +dmg: build + @rm -f $(APP_NAME)-$(VERSION).dmg + @codesign --force --deep --sign - $(APP_BUNDLE) + @hdiutil create -volname "$(APP_NAME)" -srcfolder $(APP_BUNDLE) -ov -format UDZO $(APP_NAME)-$(VERSION).dmg + @echo "✓ Created $(APP_NAME)-$(VERSION).dmg" diff --git a/macos/WhisperDictate.app/Contents/Info.plist b/macos/WhisperDictate.app/Contents/Info.plist new file mode 100644 index 0000000..623cb8a --- /dev/null +++ b/macos/WhisperDictate.app/Contents/Info.plist @@ -0,0 +1,36 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + WhisperDictate + CFBundleIconFile + AppIcon + CFBundleIdentifier + com.whisperdictate.app + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + WhisperDictate + CFBundleDisplayName + WhisperDictate + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0.0 + CFBundleVersion + 1 + LSMinimumSystemVersion + 13.0 + LSUIElement + + NSHighResolutionCapable + + NSMicrophoneUsageDescription + WhisperDictate needs microphone access to record your voice for transcription. + NSAppleEventsUsageDescription + WhisperDictate needs accessibility access to paste transcribed text. + + diff --git a/macos/src/main.swift b/macos/src/main.swift new file mode 100644 index 0000000..ff1a93d --- /dev/null +++ b/macos/src/main.swift @@ -0,0 +1,412 @@ +import Cocoa +import AVFoundation +import Carbon.HIToolbox +import ServiceManagement + +// MARK: - User Defaults Keys +struct Defaults { + static let language = "whisperLanguage" + static let modelPath = "whisperModelPath" + static let playSounds = "playSounds" +} + +// MARK: - App Delegate +class AppDelegate: NSObject, NSApplicationDelegate { + var statusItem: NSStatusItem! + var audioRecorder: AVAudioRecorder? + var isRecording = false + var settingsWindow: NSWindow? + + let audioFilePath = "/tmp/whisper-dictate.wav" + + var language: String { + get { UserDefaults.standard.string(forKey: Defaults.language) ?? "hu" } + set { UserDefaults.standard.set(newValue, forKey: Defaults.language) } + } + + var modelPath: String { + get { UserDefaults.standard.string(forKey: Defaults.modelPath) ?? NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" } + set { UserDefaults.standard.set(newValue, forKey: Defaults.modelPath) } + } + + var playSounds: Bool { + get { UserDefaults.standard.object(forKey: Defaults.playSounds) as? Bool ?? true } + set { UserDefaults.standard.set(newValue, forKey: Defaults.playSounds) } + } + + func applicationDidFinishLaunching(_ notification: Notification) { + setupStatusItem() + registerHotkey() + requestMicrophonePermission() + checkModelExists() + + NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.") + } + + // MARK: - Status Item + func setupStatusItem() { + statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength) + statusItem.button?.title = "🎤" + + let menu = NSMenu() + + menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: "")) + menu.addItem(NSMenuItem.separator()) + + let statusMenuItem = NSMenuItem(title: "Status: Ready", action: nil, keyEquivalent: "") + statusMenuItem.tag = 100 + menu.addItem(statusMenuItem) + + menu.addItem(NSMenuItem.separator()) + menu.addItem(NSMenuItem(title: "Settings...", action: #selector(showSettings), keyEquivalent: ",")) + menu.addItem(NSMenuItem.separator()) + menu.addItem(NSMenuItem(title: "Quit WhisperDictate", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q")) + + statusItem.menu = menu + } + + // MARK: - Settings Window + @objc func showSettings() { + if settingsWindow == nil { + settingsWindow = createSettingsWindow() + } + settingsWindow?.makeKeyAndOrderFront(nil) + NSApp.activate(ignoringOtherApps: true) + } + + func createSettingsWindow() -> NSWindow { + let window = NSWindow( + contentRect: NSRect(x: 0, y: 0, width: 450, height: 280), + styleMask: [.titled, .closable], + backing: .buffered, + defer: false + ) + window.title = "WhisperDictate Settings" + window.center() + + let contentView = NSView(frame: window.contentView!.bounds) + + var y: CGFloat = 230 + let labelWidth: CGFloat = 120 + let controlX: CGFloat = 140 + let controlWidth: CGFloat = 280 + + // Language + let langLabel = NSTextField(labelWithString: "Language:") + langLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24) + contentView.addSubview(langLabel) + + let langField = NSTextField(string: language) + langField.frame = NSRect(x: controlX, y: y, width: 60, height: 24) + langField.tag = 1 + langField.target = self + langField.action = #selector(languageChanged(_:)) + contentView.addSubview(langField) + + let langHint = NSTextField(labelWithString: "(hu, en, de, fr, es...)") + langHint.frame = NSRect(x: 210, y: y, width: 150, height: 24) + langHint.textColor = .secondaryLabelColor + langHint.font = NSFont.systemFont(ofSize: 11) + contentView.addSubview(langHint) + + y -= 40 + + // Model Path + let modelLabel = NSTextField(labelWithString: "Model Path:") + modelLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24) + contentView.addSubview(modelLabel) + + let modelField = NSTextField(string: modelPath) + modelField.frame = NSRect(x: controlX, y: y, width: controlWidth - 40, height: 24) + modelField.tag = 2 + modelField.target = self + modelField.action = #selector(modelPathChanged(_:)) + contentView.addSubview(modelField) + + let browseBtn = NSButton(title: "...", target: self, action: #selector(browseModel)) + browseBtn.frame = NSRect(x: controlX + controlWidth - 35, y: y, width: 35, height: 24) + contentView.addSubview(browseBtn) + + y -= 40 + + // Hotkey (display only) + let hotkeyLabel = NSTextField(labelWithString: "Hotkey:") + hotkeyLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24) + contentView.addSubview(hotkeyLabel) + + let hotkeyDisplay = NSTextField(labelWithString: "⌃⌥D (Control + Option + D)") + hotkeyDisplay.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24) + contentView.addSubview(hotkeyDisplay) + + y -= 40 + + // Play sounds + let soundCheck = NSButton(checkboxWithTitle: "Play sound feedback", target: self, action: #selector(playSoundsChanged(_:))) + soundCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24) + soundCheck.state = playSounds ? .on : .off + contentView.addSubview(soundCheck) + + y -= 40 + + // Launch at login + let loginCheck = NSButton(checkboxWithTitle: "Launch at login", target: self, action: #selector(launchAtLoginChanged(_:))) + loginCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24) + loginCheck.state = isLaunchAtLoginEnabled() ? .on : .off + contentView.addSubview(loginCheck) + + // Model download hint + let hintLabel = NSTextField(wrappingLabelWithString: "Model not found? Run: curl -L -o ~/.whisper-models/ggml-medium.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin") + hintLabel.frame = NSRect(x: 20, y: 15, width: 410, height: 40) + hintLabel.font = NSFont.systemFont(ofSize: 10) + hintLabel.textColor = .secondaryLabelColor + contentView.addSubview(hintLabel) + + window.contentView = contentView + return window + } + + @objc func languageChanged(_ sender: NSTextField) { + language = sender.stringValue + NSLog("Language changed to: \(language)") + } + + @objc func modelPathChanged(_ sender: NSTextField) { + modelPath = sender.stringValue + checkModelExists() + } + + @objc func browseModel() { + let panel = NSOpenPanel() + panel.allowsMultipleSelection = false + panel.canChooseDirectories = false + panel.message = "Select Whisper model file (.bin)" + + if panel.runModal() == .OK, let url = panel.url { + modelPath = url.path + if let contentView = settingsWindow?.contentView { + for subview in contentView.subviews { + if let textField = subview as? NSTextField, textField.tag == 2 { + textField.stringValue = modelPath + } + } + } + checkModelExists() + } + } + + @objc func playSoundsChanged(_ sender: NSButton) { + playSounds = sender.state == .on + } + + @objc func launchAtLoginChanged(_ sender: NSButton) { + setLaunchAtLogin(sender.state == .on) + } + + // MARK: - Launch at Login + func isLaunchAtLoginEnabled() -> Bool { + if #available(macOS 13.0, *) { + return SMAppService.mainApp.status == .enabled + } + return false + } + + func setLaunchAtLogin(_ enabled: Bool) { + if #available(macOS 13.0, *) { + do { + if enabled { + try SMAppService.mainApp.register() + } else { + try SMAppService.mainApp.unregister() + } + } catch { + NSLog("Failed to set launch at login: \(error)") + } + } + } + + // MARK: - Model Check + func checkModelExists() { + if !FileManager.default.fileExists(atPath: modelPath) { + updateStatus("⚠️ Model not found") + } else { + updateStatus("Ready") + } + } + + func updateStatus(_ status: String) { + if let menu = statusItem.menu { + for item in menu.items { + if item.tag == 100 { + item.title = "Status: \(status)" + } + } + } + } + + // MARK: - Permissions + func requestMicrophonePermission() { + AVCaptureDevice.requestAccess(for: .audio) { granted in + if !granted { + DispatchQueue.main.async { + let alert = NSAlert() + alert.messageText = "Microphone Access Required" + alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone" + alert.alertStyle = .warning + alert.runModal() + } + } + } + } + + // MARK: - Hotkey Registration + func registerHotkey() { + var hotKeyRef: EventHotKeyRef? + var gMyHotKeyID = EventHotKeyID() + gMyHotKeyID.signature = OSType(0x57485044) // "WHPD" + gMyHotKeyID.id = 1 + + let modifiers: UInt32 = UInt32(controlKey | optionKey) + let keyCode: UInt32 = 2 // D key + + RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef) + + var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed)) + InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in + let appDelegate = NSApplication.shared.delegate as! AppDelegate + DispatchQueue.main.async { + appDelegate.toggleRecording() + } + return noErr + }, 1, &eventType, nil, nil) + } + + // MARK: - Recording + @objc func toggleRecording() { + if isRecording { + stopRecordingAndTranscribe() + } else { + startRecording() + } + } + + func startRecording() { + let audioURL = URL(fileURLWithPath: audioFilePath) + try? FileManager.default.removeItem(at: audioURL) + + let settings: [String: Any] = [ + AVFormatIDKey: Int(kAudioFormatLinearPCM), + AVSampleRateKey: 16000, + AVNumberOfChannelsKey: 1, + AVLinearPCMBitDepthKey: 16, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false + ] + + do { + audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings) + audioRecorder?.record() + isRecording = true + statusItem.button?.title = "🔴" + updateStatus("Recording...") + if playSounds { NSSound(named: "Tink")?.play() } + NSLog("Recording started") + } catch { + NSLog("Recording failed: \(error)") + if playSounds { NSSound(named: "Basso")?.play() } + } + } + + func stopRecordingAndTranscribe() { + audioRecorder?.stop() + isRecording = false + statusItem.button?.title = "⏳" + updateStatus("Transcribing...") + if playSounds { NSSound(named: "Pop")?.play() } + NSLog("Recording stopped, transcribing...") + + DispatchQueue.global(qos: .userInitiated).async { + self.transcribe() + } + } + + // MARK: - Transcription + func transcribe() { + let task = Process() + task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli") + task.arguments = ["-m", modelPath, "-l", language, "-f", audioFilePath] + + let pipe = Pipe() + task.standardOutput = pipe + task.standardError = FileHandle.nullDevice + + do { + try task.run() + task.waitUntilExit() + + let data = pipe.fileHandleForReading.readDataToEndOfFile() + let output = String(data: data, encoding: .utf8) ?? "" + + let lines = output.components(separatedBy: "\n") + var result = "" + for line in lines { + if line.hasPrefix("[") { + if let range = line.range(of: "]") { + let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces) + result += text + " " + } + } + } + result = result.trimmingCharacters(in: .whitespaces) + + DispatchQueue.main.async { + if !result.isEmpty { + self.pasteText(result) + } else { + self.statusItem.button?.title = "🎤" + self.updateStatus("Ready") + if self.playSounds { NSSound(named: "Basso")?.play() } + NSLog("No speech recognized") + } + } + } catch { + DispatchQueue.main.async { + self.statusItem.button?.title = "🎤" + self.updateStatus("Error") + if self.playSounds { NSSound(named: "Basso")?.play() } + NSLog("Transcription failed: \(error)") + } + } + } + + // MARK: - Paste + func pasteText(_ text: String) { + let pasteboard = NSPasteboard.general + pasteboard.clearContents() + pasteboard.setString(text, forType: .string) + + NSLog("Transcribed: \(text)") + + DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { + let source = CGEventSource(stateID: .hidSystemState) + + let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) + keyDown?.flags = .maskCommand + keyDown?.post(tap: .cghidEventTap) + + let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false) + keyUp?.flags = .maskCommand + keyUp?.post(tap: .cghidEventTap) + + self.statusItem.button?.title = "🎤" + self.updateStatus("Ready") + if self.playSounds { NSSound(named: "Glass")?.play() } + } + } +} + +// MARK: - Main +let app = NSApplication.shared +let delegate = AppDelegate() +app.delegate = delegate +app.setActivationPolicy(.accessory) +app.run() diff --git a/main.swift b/main.swift deleted file mode 100644 index 744966b..0000000 --- a/main.swift +++ /dev/null @@ -1,193 +0,0 @@ -import Cocoa -import AVFoundation -import Carbon.HIToolbox - -class AppDelegate: NSObject, NSApplicationDelegate { - var statusItem: NSStatusItem! - var audioRecorder: AVAudioRecorder? - var isRecording = false - let audioFilePath = "/tmp/whisper-dictate.wav" - let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" - - func applicationDidFinishLaunching(_ notification: Notification) { - // Create menu bar item - statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength) - statusItem.button?.title = "🎤" - - let menu = NSMenu() - menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: "")) - menu.addItem(NSMenuItem.separator()) - menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q")) - statusItem.menu = menu - - // Register global hotkey (Control + Option + D) - registerHotkey() - - // Request microphone permission - AVCaptureDevice.requestAccess(for: .audio) { granted in - if !granted { - DispatchQueue.main.async { - let alert = NSAlert() - alert.messageText = "Microphone access required" - alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone" - alert.runModal() - } - } - } - - NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.") - } - - func registerHotkey() { - // Register Control + Option + D - var hotKeyRef: EventHotKeyRef? - var gMyHotKeyID = EventHotKeyID() - gMyHotKeyID.signature = OSType(0x57485044) // "WHPD" - gMyHotKeyID.id = 1 - - // D = 2, Control = 0x1000, Option = 0x0800 - let modifiers: UInt32 = UInt32(controlKey | optionKey) - let keyCode: UInt32 = 2 // D key - - RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef) - - // Install event handler - var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed)) - InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in - let appDelegate = NSApplication.shared.delegate as! AppDelegate - DispatchQueue.main.async { - appDelegate.toggleRecording() - } - return noErr - }, 1, &eventType, nil, nil) - } - - @objc func toggleRecording() { - if isRecording { - stopRecordingAndTranscribe() - } else { - startRecording() - } - } - - func startRecording() { - let audioURL = URL(fileURLWithPath: audioFilePath) - - // Remove old file - try? FileManager.default.removeItem(at: audioURL) - - let settings: [String: Any] = [ - AVFormatIDKey: Int(kAudioFormatLinearPCM), - AVSampleRateKey: 16000, - AVNumberOfChannelsKey: 1, - AVLinearPCMBitDepthKey: 16, - AVLinearPCMIsFloatKey: false, - AVLinearPCMIsBigEndianKey: false - ] - - do { - audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings) - audioRecorder?.record() - isRecording = true - statusItem.button?.title = "🔴" - NSSound(named: "Tink")?.play() - NSLog("Recording started") - } catch { - NSLog("Recording failed: \(error)") - NSSound(named: "Basso")?.play() - } - } - - func stopRecordingAndTranscribe() { - audioRecorder?.stop() - isRecording = false - statusItem.button?.title = "⏳" - NSSound(named: "Pop")?.play() - NSLog("Recording stopped, transcribing...") - - DispatchQueue.global(qos: .userInitiated).async { - self.transcribe() - } - } - - func transcribe() { - let task = Process() - task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli") - task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath] - - let pipe = Pipe() - task.standardOutput = pipe - task.standardError = FileHandle.nullDevice - - do { - try task.run() - task.waitUntilExit() - - let data = pipe.fileHandleForReading.readDataToEndOfFile() - let output = String(data: data, encoding: .utf8) ?? "" - - // Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world" - let lines = output.components(separatedBy: "\n") - var result = "" - for line in lines { - if line.hasPrefix("[") { - if let range = line.range(of: "]") { - let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces) - result += text + " " - } - } - } - result = result.trimmingCharacters(in: .whitespaces) - - DispatchQueue.main.async { - if !result.isEmpty { - self.pasteText(result) - } else { - self.statusItem.button?.title = "🎤" - NSSound(named: "Basso")?.play() - NSLog("No speech recognized") - } - } - } catch { - DispatchQueue.main.async { - self.statusItem.button?.title = "🎤" - NSSound(named: "Basso")?.play() - NSLog("Transcription failed: \(error)") - } - } - } - - func pasteText(_ text: String) { - // Copy to clipboard - let pasteboard = NSPasteboard.general - pasteboard.clearContents() - pasteboard.setString(text, forType: .string) - - NSLog("Transcribed: \(text)") - - // Simulate Cmd+V - DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { - let source = CGEventSource(stateID: .hidSystemState) - - // Key down - let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key - keyDown?.flags = .maskCommand - keyDown?.post(tap: .cghidEventTap) - - // Key up - let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false) - keyUp?.flags = .maskCommand - keyUp?.post(tap: .cghidEventTap) - - self.statusItem.button?.title = "🎤" - NSSound(named: "Glass")?.play() - } - } -} - -// Main -let app = NSApplication.shared -let delegate = AppDelegate() -app.delegate = delegate -app.setActivationPolicy(.accessory) // Menu bar only, no dock icon -app.run()