Restructure for multi-platform support and add Settings UI

- Move macOS code to macos/ directory for platform separation
- Add Settings window with configurable language, model path, and sound toggle
- Add launch at login support using SMAppService
- Add proper .app bundle structure with Info.plist
- Add Makefile for build, install, run, and dmg targets
- Store preferences in UserDefaults for persistence

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hariel1985
2026-02-02 12:31:42 +01:00
szülő 3d9e798f02
commit 1727f4ba5b
4 fájl változott, egészen pontosan 499 új sor hozzáadva és 193 régi sor törölve

412
macos/src/main.swift Normal file
Fájl megtekintése

@@ -0,0 +1,412 @@
import Cocoa
import AVFoundation
import Carbon.HIToolbox
import ServiceManagement
// MARK: - User Defaults Keys
struct Defaults {
static let language = "whisperLanguage"
static let modelPath = "whisperModelPath"
static let playSounds = "playSounds"
}
// MARK: - App Delegate
class AppDelegate: NSObject, NSApplicationDelegate {
var statusItem: NSStatusItem!
var audioRecorder: AVAudioRecorder?
var isRecording = false
var settingsWindow: NSWindow?
let audioFilePath = "/tmp/whisper-dictate.wav"
var language: String {
get { UserDefaults.standard.string(forKey: Defaults.language) ?? "hu" }
set { UserDefaults.standard.set(newValue, forKey: Defaults.language) }
}
var modelPath: String {
get { UserDefaults.standard.string(forKey: Defaults.modelPath) ?? NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" }
set { UserDefaults.standard.set(newValue, forKey: Defaults.modelPath) }
}
var playSounds: Bool {
get { UserDefaults.standard.object(forKey: Defaults.playSounds) as? Bool ?? true }
set { UserDefaults.standard.set(newValue, forKey: Defaults.playSounds) }
}
func applicationDidFinishLaunching(_ notification: Notification) {
setupStatusItem()
registerHotkey()
requestMicrophonePermission()
checkModelExists()
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
}
// MARK: - Status Item
func setupStatusItem() {
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
statusItem.button?.title = "🎤"
let menu = NSMenu()
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
menu.addItem(NSMenuItem.separator())
let statusMenuItem = NSMenuItem(title: "Status: Ready", action: nil, keyEquivalent: "")
statusMenuItem.tag = 100
menu.addItem(statusMenuItem)
menu.addItem(NSMenuItem.separator())
menu.addItem(NSMenuItem(title: "Settings...", action: #selector(showSettings), keyEquivalent: ","))
menu.addItem(NSMenuItem.separator())
menu.addItem(NSMenuItem(title: "Quit WhisperDictate", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
statusItem.menu = menu
}
// MARK: - Settings Window
@objc func showSettings() {
if settingsWindow == nil {
settingsWindow = createSettingsWindow()
}
settingsWindow?.makeKeyAndOrderFront(nil)
NSApp.activate(ignoringOtherApps: true)
}
func createSettingsWindow() -> NSWindow {
let window = NSWindow(
contentRect: NSRect(x: 0, y: 0, width: 450, height: 280),
styleMask: [.titled, .closable],
backing: .buffered,
defer: false
)
window.title = "WhisperDictate Settings"
window.center()
let contentView = NSView(frame: window.contentView!.bounds)
var y: CGFloat = 230
let labelWidth: CGFloat = 120
let controlX: CGFloat = 140
let controlWidth: CGFloat = 280
// Language
let langLabel = NSTextField(labelWithString: "Language:")
langLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
contentView.addSubview(langLabel)
let langField = NSTextField(string: language)
langField.frame = NSRect(x: controlX, y: y, width: 60, height: 24)
langField.tag = 1
langField.target = self
langField.action = #selector(languageChanged(_:))
contentView.addSubview(langField)
let langHint = NSTextField(labelWithString: "(hu, en, de, fr, es...)")
langHint.frame = NSRect(x: 210, y: y, width: 150, height: 24)
langHint.textColor = .secondaryLabelColor
langHint.font = NSFont.systemFont(ofSize: 11)
contentView.addSubview(langHint)
y -= 40
// Model Path
let modelLabel = NSTextField(labelWithString: "Model Path:")
modelLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
contentView.addSubview(modelLabel)
let modelField = NSTextField(string: modelPath)
modelField.frame = NSRect(x: controlX, y: y, width: controlWidth - 40, height: 24)
modelField.tag = 2
modelField.target = self
modelField.action = #selector(modelPathChanged(_:))
contentView.addSubview(modelField)
let browseBtn = NSButton(title: "...", target: self, action: #selector(browseModel))
browseBtn.frame = NSRect(x: controlX + controlWidth - 35, y: y, width: 35, height: 24)
contentView.addSubview(browseBtn)
y -= 40
// Hotkey (display only)
let hotkeyLabel = NSTextField(labelWithString: "Hotkey:")
hotkeyLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
contentView.addSubview(hotkeyLabel)
let hotkeyDisplay = NSTextField(labelWithString: "⌃⌥D (Control + Option + D)")
hotkeyDisplay.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
contentView.addSubview(hotkeyDisplay)
y -= 40
// Play sounds
let soundCheck = NSButton(checkboxWithTitle: "Play sound feedback", target: self, action: #selector(playSoundsChanged(_:)))
soundCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
soundCheck.state = playSounds ? .on : .off
contentView.addSubview(soundCheck)
y -= 40
// Launch at login
let loginCheck = NSButton(checkboxWithTitle: "Launch at login", target: self, action: #selector(launchAtLoginChanged(_:)))
loginCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
loginCheck.state = isLaunchAtLoginEnabled() ? .on : .off
contentView.addSubview(loginCheck)
// Model download hint
let hintLabel = NSTextField(wrappingLabelWithString: "Model not found? Run: curl -L -o ~/.whisper-models/ggml-medium.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin")
hintLabel.frame = NSRect(x: 20, y: 15, width: 410, height: 40)
hintLabel.font = NSFont.systemFont(ofSize: 10)
hintLabel.textColor = .secondaryLabelColor
contentView.addSubview(hintLabel)
window.contentView = contentView
return window
}
@objc func languageChanged(_ sender: NSTextField) {
language = sender.stringValue
NSLog("Language changed to: \(language)")
}
@objc func modelPathChanged(_ sender: NSTextField) {
modelPath = sender.stringValue
checkModelExists()
}
@objc func browseModel() {
let panel = NSOpenPanel()
panel.allowsMultipleSelection = false
panel.canChooseDirectories = false
panel.message = "Select Whisper model file (.bin)"
if panel.runModal() == .OK, let url = panel.url {
modelPath = url.path
if let contentView = settingsWindow?.contentView {
for subview in contentView.subviews {
if let textField = subview as? NSTextField, textField.tag == 2 {
textField.stringValue = modelPath
}
}
}
checkModelExists()
}
}
@objc func playSoundsChanged(_ sender: NSButton) {
playSounds = sender.state == .on
}
@objc func launchAtLoginChanged(_ sender: NSButton) {
setLaunchAtLogin(sender.state == .on)
}
// MARK: - Launch at Login
func isLaunchAtLoginEnabled() -> Bool {
if #available(macOS 13.0, *) {
return SMAppService.mainApp.status == .enabled
}
return false
}
func setLaunchAtLogin(_ enabled: Bool) {
if #available(macOS 13.0, *) {
do {
if enabled {
try SMAppService.mainApp.register()
} else {
try SMAppService.mainApp.unregister()
}
} catch {
NSLog("Failed to set launch at login: \(error)")
}
}
}
// MARK: - Model Check
func checkModelExists() {
if !FileManager.default.fileExists(atPath: modelPath) {
updateStatus("⚠️ Model not found")
} else {
updateStatus("Ready")
}
}
func updateStatus(_ status: String) {
if let menu = statusItem.menu {
for item in menu.items {
if item.tag == 100 {
item.title = "Status: \(status)"
}
}
}
}
// MARK: - Permissions
func requestMicrophonePermission() {
AVCaptureDevice.requestAccess(for: .audio) { granted in
if !granted {
DispatchQueue.main.async {
let alert = NSAlert()
alert.messageText = "Microphone Access Required"
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
alert.alertStyle = .warning
alert.runModal()
}
}
}
}
// MARK: - Hotkey Registration
func registerHotkey() {
var hotKeyRef: EventHotKeyRef?
var gMyHotKeyID = EventHotKeyID()
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
gMyHotKeyID.id = 1
let modifiers: UInt32 = UInt32(controlKey | optionKey)
let keyCode: UInt32 = 2 // D key
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
let appDelegate = NSApplication.shared.delegate as! AppDelegate
DispatchQueue.main.async {
appDelegate.toggleRecording()
}
return noErr
}, 1, &eventType, nil, nil)
}
// MARK: - Recording
@objc func toggleRecording() {
if isRecording {
stopRecordingAndTranscribe()
} else {
startRecording()
}
}
func startRecording() {
let audioURL = URL(fileURLWithPath: audioFilePath)
try? FileManager.default.removeItem(at: audioURL)
let settings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVSampleRateKey: 16000,
AVNumberOfChannelsKey: 1,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false
]
do {
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
audioRecorder?.record()
isRecording = true
statusItem.button?.title = "🔴"
updateStatus("Recording...")
if playSounds { NSSound(named: "Tink")?.play() }
NSLog("Recording started")
} catch {
NSLog("Recording failed: \(error)")
if playSounds { NSSound(named: "Basso")?.play() }
}
}
func stopRecordingAndTranscribe() {
audioRecorder?.stop()
isRecording = false
statusItem.button?.title = ""
updateStatus("Transcribing...")
if playSounds { NSSound(named: "Pop")?.play() }
NSLog("Recording stopped, transcribing...")
DispatchQueue.global(qos: .userInitiated).async {
self.transcribe()
}
}
// MARK: - Transcription
func transcribe() {
let task = Process()
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
task.arguments = ["-m", modelPath, "-l", language, "-f", audioFilePath]
let pipe = Pipe()
task.standardOutput = pipe
task.standardError = FileHandle.nullDevice
do {
try task.run()
task.waitUntilExit()
let data = pipe.fileHandleForReading.readDataToEndOfFile()
let output = String(data: data, encoding: .utf8) ?? ""
let lines = output.components(separatedBy: "\n")
var result = ""
for line in lines {
if line.hasPrefix("[") {
if let range = line.range(of: "]") {
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
result += text + " "
}
}
}
result = result.trimmingCharacters(in: .whitespaces)
DispatchQueue.main.async {
if !result.isEmpty {
self.pasteText(result)
} else {
self.statusItem.button?.title = "🎤"
self.updateStatus("Ready")
if self.playSounds { NSSound(named: "Basso")?.play() }
NSLog("No speech recognized")
}
}
} catch {
DispatchQueue.main.async {
self.statusItem.button?.title = "🎤"
self.updateStatus("Error")
if self.playSounds { NSSound(named: "Basso")?.play() }
NSLog("Transcription failed: \(error)")
}
}
}
// MARK: - Paste
func pasteText(_ text: String) {
let pasteboard = NSPasteboard.general
pasteboard.clearContents()
pasteboard.setString(text, forType: .string)
NSLog("Transcribed: \(text)")
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
let source = CGEventSource(stateID: .hidSystemState)
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true)
keyDown?.flags = .maskCommand
keyDown?.post(tap: .cghidEventTap)
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
keyUp?.flags = .maskCommand
keyUp?.post(tap: .cghidEventTap)
self.statusItem.button?.title = "🎤"
self.updateStatus("Ready")
if self.playSounds { NSSound(named: "Glass")?.play() }
}
}
}
// MARK: - Main
let app = NSApplication.shared
let delegate = AppDelegate()
app.delegate = delegate
app.setActivationPolicy(.accessory)
app.run()