Restructure for multi-platform support and add Settings UI
- Move macOS code to macos/ directory for platform separation - Add Settings window with configurable language, model path, and sound toggle - Add launch at login support using SMAppService - Add proper .app bundle structure with Info.plist - Add Makefile for build, install, run, and dmg targets - Store preferences in UserDefaults for persistence Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
51
macos/Makefile
Normal file
51
macos/Makefile
Normal file
@@ -0,0 +1,51 @@
|
||||
# WhisperDictate macOS Build
|
||||
|
||||
APP_NAME = WhisperDictate
|
||||
APP_BUNDLE = $(APP_NAME).app
|
||||
VERSION = 1.0.0
|
||||
|
||||
# Directories
|
||||
SRC_DIR = src
|
||||
BUILD_DIR = build
|
||||
BUNDLE_DIR = $(APP_BUNDLE)/Contents
|
||||
|
||||
# Compiler settings
|
||||
SWIFT = swiftc
|
||||
SWIFT_FLAGS = -O -framework Cocoa -framework AVFoundation -framework Carbon -framework CoreGraphics -framework ServiceManagement
|
||||
|
||||
.PHONY: all clean build install run dmg
|
||||
|
||||
all: build
|
||||
|
||||
build: $(BUILD_DIR)/$(APP_NAME)
|
||||
@echo "✓ Build complete"
|
||||
|
||||
$(BUILD_DIR)/$(APP_NAME): $(SRC_DIR)/main.swift
|
||||
@mkdir -p $(BUILD_DIR)
|
||||
@mkdir -p $(BUNDLE_DIR)/MacOS
|
||||
@mkdir -p $(BUNDLE_DIR)/Resources
|
||||
$(SWIFT) $(SWIFT_FLAGS) -o $(BUNDLE_DIR)/MacOS/$(APP_NAME) $(SRC_DIR)/main.swift
|
||||
@cp $(APP_BUNDLE)/Contents/Info.plist $(BUNDLE_DIR)/ 2>/dev/null || true
|
||||
@touch $(BUILD_DIR)/$(APP_NAME)
|
||||
@echo "✓ Built $(APP_BUNDLE)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR)
|
||||
rm -rf $(BUNDLE_DIR)/MacOS/$(APP_NAME)
|
||||
@echo "✓ Cleaned"
|
||||
|
||||
install: build
|
||||
@rm -rf /Applications/$(APP_BUNDLE)
|
||||
@cp -R $(APP_BUNDLE) /Applications/
|
||||
@codesign --force --deep --sign - /Applications/$(APP_BUNDLE)
|
||||
@echo "✓ Installed to /Applications/$(APP_BUNDLE)"
|
||||
|
||||
run: build
|
||||
@./$(BUNDLE_DIR)/MacOS/$(APP_NAME) &
|
||||
@echo "✓ Running $(APP_NAME)"
|
||||
|
||||
dmg: build
|
||||
@rm -f $(APP_NAME)-$(VERSION).dmg
|
||||
@codesign --force --deep --sign - $(APP_BUNDLE)
|
||||
@hdiutil create -volname "$(APP_NAME)" -srcfolder $(APP_BUNDLE) -ov -format UDZO $(APP_NAME)-$(VERSION).dmg
|
||||
@echo "✓ Created $(APP_NAME)-$(VERSION).dmg"
|
||||
36
macos/WhisperDictate.app/Contents/Info.plist
Normal file
36
macos/WhisperDictate.app/Contents/Info.plist
Normal file
@@ -0,0 +1,36 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>WhisperDictate</string>
|
||||
<key>CFBundleIconFile</key>
|
||||
<string>AppIcon</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>com.whisperdictate.app</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>WhisperDictate</string>
|
||||
<key>CFBundleDisplayName</key>
|
||||
<string>WhisperDictate</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>APPL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>LSMinimumSystemVersion</key>
|
||||
<string>13.0</string>
|
||||
<key>LSUIElement</key>
|
||||
<true/>
|
||||
<key>NSHighResolutionCapable</key>
|
||||
<true/>
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>WhisperDictate needs microphone access to record your voice for transcription.</string>
|
||||
<key>NSAppleEventsUsageDescription</key>
|
||||
<string>WhisperDictate needs accessibility access to paste transcribed text.</string>
|
||||
</dict>
|
||||
</plist>
|
||||
412
macos/src/main.swift
Normal file
412
macos/src/main.swift
Normal file
@@ -0,0 +1,412 @@
|
||||
import Cocoa
|
||||
import AVFoundation
|
||||
import Carbon.HIToolbox
|
||||
import ServiceManagement
|
||||
|
||||
// MARK: - User Defaults Keys
|
||||
struct Defaults {
|
||||
static let language = "whisperLanguage"
|
||||
static let modelPath = "whisperModelPath"
|
||||
static let playSounds = "playSounds"
|
||||
}
|
||||
|
||||
// MARK: - App Delegate
|
||||
class AppDelegate: NSObject, NSApplicationDelegate {
|
||||
var statusItem: NSStatusItem!
|
||||
var audioRecorder: AVAudioRecorder?
|
||||
var isRecording = false
|
||||
var settingsWindow: NSWindow?
|
||||
|
||||
let audioFilePath = "/tmp/whisper-dictate.wav"
|
||||
|
||||
var language: String {
|
||||
get { UserDefaults.standard.string(forKey: Defaults.language) ?? "hu" }
|
||||
set { UserDefaults.standard.set(newValue, forKey: Defaults.language) }
|
||||
}
|
||||
|
||||
var modelPath: String {
|
||||
get { UserDefaults.standard.string(forKey: Defaults.modelPath) ?? NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" }
|
||||
set { UserDefaults.standard.set(newValue, forKey: Defaults.modelPath) }
|
||||
}
|
||||
|
||||
var playSounds: Bool {
|
||||
get { UserDefaults.standard.object(forKey: Defaults.playSounds) as? Bool ?? true }
|
||||
set { UserDefaults.standard.set(newValue, forKey: Defaults.playSounds) }
|
||||
}
|
||||
|
||||
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||
setupStatusItem()
|
||||
registerHotkey()
|
||||
requestMicrophonePermission()
|
||||
checkModelExists()
|
||||
|
||||
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
|
||||
}
|
||||
|
||||
// MARK: - Status Item
|
||||
func setupStatusItem() {
|
||||
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
|
||||
statusItem.button?.title = "🎤"
|
||||
|
||||
let menu = NSMenu()
|
||||
|
||||
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
|
||||
menu.addItem(NSMenuItem.separator())
|
||||
|
||||
let statusMenuItem = NSMenuItem(title: "Status: Ready", action: nil, keyEquivalent: "")
|
||||
statusMenuItem.tag = 100
|
||||
menu.addItem(statusMenuItem)
|
||||
|
||||
menu.addItem(NSMenuItem.separator())
|
||||
menu.addItem(NSMenuItem(title: "Settings...", action: #selector(showSettings), keyEquivalent: ","))
|
||||
menu.addItem(NSMenuItem.separator())
|
||||
menu.addItem(NSMenuItem(title: "Quit WhisperDictate", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
|
||||
|
||||
statusItem.menu = menu
|
||||
}
|
||||
|
||||
// MARK: - Settings Window
|
||||
@objc func showSettings() {
|
||||
if settingsWindow == nil {
|
||||
settingsWindow = createSettingsWindow()
|
||||
}
|
||||
settingsWindow?.makeKeyAndOrderFront(nil)
|
||||
NSApp.activate(ignoringOtherApps: true)
|
||||
}
|
||||
|
||||
func createSettingsWindow() -> NSWindow {
|
||||
let window = NSWindow(
|
||||
contentRect: NSRect(x: 0, y: 0, width: 450, height: 280),
|
||||
styleMask: [.titled, .closable],
|
||||
backing: .buffered,
|
||||
defer: false
|
||||
)
|
||||
window.title = "WhisperDictate Settings"
|
||||
window.center()
|
||||
|
||||
let contentView = NSView(frame: window.contentView!.bounds)
|
||||
|
||||
var y: CGFloat = 230
|
||||
let labelWidth: CGFloat = 120
|
||||
let controlX: CGFloat = 140
|
||||
let controlWidth: CGFloat = 280
|
||||
|
||||
// Language
|
||||
let langLabel = NSTextField(labelWithString: "Language:")
|
||||
langLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||
contentView.addSubview(langLabel)
|
||||
|
||||
let langField = NSTextField(string: language)
|
||||
langField.frame = NSRect(x: controlX, y: y, width: 60, height: 24)
|
||||
langField.tag = 1
|
||||
langField.target = self
|
||||
langField.action = #selector(languageChanged(_:))
|
||||
contentView.addSubview(langField)
|
||||
|
||||
let langHint = NSTextField(labelWithString: "(hu, en, de, fr, es...)")
|
||||
langHint.frame = NSRect(x: 210, y: y, width: 150, height: 24)
|
||||
langHint.textColor = .secondaryLabelColor
|
||||
langHint.font = NSFont.systemFont(ofSize: 11)
|
||||
contentView.addSubview(langHint)
|
||||
|
||||
y -= 40
|
||||
|
||||
// Model Path
|
||||
let modelLabel = NSTextField(labelWithString: "Model Path:")
|
||||
modelLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||
contentView.addSubview(modelLabel)
|
||||
|
||||
let modelField = NSTextField(string: modelPath)
|
||||
modelField.frame = NSRect(x: controlX, y: y, width: controlWidth - 40, height: 24)
|
||||
modelField.tag = 2
|
||||
modelField.target = self
|
||||
modelField.action = #selector(modelPathChanged(_:))
|
||||
contentView.addSubview(modelField)
|
||||
|
||||
let browseBtn = NSButton(title: "...", target: self, action: #selector(browseModel))
|
||||
browseBtn.frame = NSRect(x: controlX + controlWidth - 35, y: y, width: 35, height: 24)
|
||||
contentView.addSubview(browseBtn)
|
||||
|
||||
y -= 40
|
||||
|
||||
// Hotkey (display only)
|
||||
let hotkeyLabel = NSTextField(labelWithString: "Hotkey:")
|
||||
hotkeyLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||
contentView.addSubview(hotkeyLabel)
|
||||
|
||||
let hotkeyDisplay = NSTextField(labelWithString: "⌃⌥D (Control + Option + D)")
|
||||
hotkeyDisplay.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||
contentView.addSubview(hotkeyDisplay)
|
||||
|
||||
y -= 40
|
||||
|
||||
// Play sounds
|
||||
let soundCheck = NSButton(checkboxWithTitle: "Play sound feedback", target: self, action: #selector(playSoundsChanged(_:)))
|
||||
soundCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||
soundCheck.state = playSounds ? .on : .off
|
||||
contentView.addSubview(soundCheck)
|
||||
|
||||
y -= 40
|
||||
|
||||
// Launch at login
|
||||
let loginCheck = NSButton(checkboxWithTitle: "Launch at login", target: self, action: #selector(launchAtLoginChanged(_:)))
|
||||
loginCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||
loginCheck.state = isLaunchAtLoginEnabled() ? .on : .off
|
||||
contentView.addSubview(loginCheck)
|
||||
|
||||
// Model download hint
|
||||
let hintLabel = NSTextField(wrappingLabelWithString: "Model not found? Run: curl -L -o ~/.whisper-models/ggml-medium.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin")
|
||||
hintLabel.frame = NSRect(x: 20, y: 15, width: 410, height: 40)
|
||||
hintLabel.font = NSFont.systemFont(ofSize: 10)
|
||||
hintLabel.textColor = .secondaryLabelColor
|
||||
contentView.addSubview(hintLabel)
|
||||
|
||||
window.contentView = contentView
|
||||
return window
|
||||
}
|
||||
|
||||
@objc func languageChanged(_ sender: NSTextField) {
|
||||
language = sender.stringValue
|
||||
NSLog("Language changed to: \(language)")
|
||||
}
|
||||
|
||||
@objc func modelPathChanged(_ sender: NSTextField) {
|
||||
modelPath = sender.stringValue
|
||||
checkModelExists()
|
||||
}
|
||||
|
||||
@objc func browseModel() {
|
||||
let panel = NSOpenPanel()
|
||||
panel.allowsMultipleSelection = false
|
||||
panel.canChooseDirectories = false
|
||||
panel.message = "Select Whisper model file (.bin)"
|
||||
|
||||
if panel.runModal() == .OK, let url = panel.url {
|
||||
modelPath = url.path
|
||||
if let contentView = settingsWindow?.contentView {
|
||||
for subview in contentView.subviews {
|
||||
if let textField = subview as? NSTextField, textField.tag == 2 {
|
||||
textField.stringValue = modelPath
|
||||
}
|
||||
}
|
||||
}
|
||||
checkModelExists()
|
||||
}
|
||||
}
|
||||
|
||||
@objc func playSoundsChanged(_ sender: NSButton) {
|
||||
playSounds = sender.state == .on
|
||||
}
|
||||
|
||||
@objc func launchAtLoginChanged(_ sender: NSButton) {
|
||||
setLaunchAtLogin(sender.state == .on)
|
||||
}
|
||||
|
||||
// MARK: - Launch at Login
|
||||
func isLaunchAtLoginEnabled() -> Bool {
|
||||
if #available(macOS 13.0, *) {
|
||||
return SMAppService.mainApp.status == .enabled
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func setLaunchAtLogin(_ enabled: Bool) {
|
||||
if #available(macOS 13.0, *) {
|
||||
do {
|
||||
if enabled {
|
||||
try SMAppService.mainApp.register()
|
||||
} else {
|
||||
try SMAppService.mainApp.unregister()
|
||||
}
|
||||
} catch {
|
||||
NSLog("Failed to set launch at login: \(error)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Model Check
|
||||
func checkModelExists() {
|
||||
if !FileManager.default.fileExists(atPath: modelPath) {
|
||||
updateStatus("⚠️ Model not found")
|
||||
} else {
|
||||
updateStatus("Ready")
|
||||
}
|
||||
}
|
||||
|
||||
func updateStatus(_ status: String) {
|
||||
if let menu = statusItem.menu {
|
||||
for item in menu.items {
|
||||
if item.tag == 100 {
|
||||
item.title = "Status: \(status)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Permissions
|
||||
func requestMicrophonePermission() {
|
||||
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
||||
if !granted {
|
||||
DispatchQueue.main.async {
|
||||
let alert = NSAlert()
|
||||
alert.messageText = "Microphone Access Required"
|
||||
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
|
||||
alert.alertStyle = .warning
|
||||
alert.runModal()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Hotkey Registration
|
||||
func registerHotkey() {
|
||||
var hotKeyRef: EventHotKeyRef?
|
||||
var gMyHotKeyID = EventHotKeyID()
|
||||
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
|
||||
gMyHotKeyID.id = 1
|
||||
|
||||
let modifiers: UInt32 = UInt32(controlKey | optionKey)
|
||||
let keyCode: UInt32 = 2 // D key
|
||||
|
||||
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
|
||||
|
||||
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
|
||||
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
|
||||
let appDelegate = NSApplication.shared.delegate as! AppDelegate
|
||||
DispatchQueue.main.async {
|
||||
appDelegate.toggleRecording()
|
||||
}
|
||||
return noErr
|
||||
}, 1, &eventType, nil, nil)
|
||||
}
|
||||
|
||||
// MARK: - Recording
|
||||
@objc func toggleRecording() {
|
||||
if isRecording {
|
||||
stopRecordingAndTranscribe()
|
||||
} else {
|
||||
startRecording()
|
||||
}
|
||||
}
|
||||
|
||||
func startRecording() {
|
||||
let audioURL = URL(fileURLWithPath: audioFilePath)
|
||||
try? FileManager.default.removeItem(at: audioURL)
|
||||
|
||||
let settings: [String: Any] = [
|
||||
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
||||
AVSampleRateKey: 16000,
|
||||
AVNumberOfChannelsKey: 1,
|
||||
AVLinearPCMBitDepthKey: 16,
|
||||
AVLinearPCMIsFloatKey: false,
|
||||
AVLinearPCMIsBigEndianKey: false
|
||||
]
|
||||
|
||||
do {
|
||||
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
|
||||
audioRecorder?.record()
|
||||
isRecording = true
|
||||
statusItem.button?.title = "🔴"
|
||||
updateStatus("Recording...")
|
||||
if playSounds { NSSound(named: "Tink")?.play() }
|
||||
NSLog("Recording started")
|
||||
} catch {
|
||||
NSLog("Recording failed: \(error)")
|
||||
if playSounds { NSSound(named: "Basso")?.play() }
|
||||
}
|
||||
}
|
||||
|
||||
func stopRecordingAndTranscribe() {
|
||||
audioRecorder?.stop()
|
||||
isRecording = false
|
||||
statusItem.button?.title = "⏳"
|
||||
updateStatus("Transcribing...")
|
||||
if playSounds { NSSound(named: "Pop")?.play() }
|
||||
NSLog("Recording stopped, transcribing...")
|
||||
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
self.transcribe()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Transcription
|
||||
func transcribe() {
|
||||
let task = Process()
|
||||
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
|
||||
task.arguments = ["-m", modelPath, "-l", language, "-f", audioFilePath]
|
||||
|
||||
let pipe = Pipe()
|
||||
task.standardOutput = pipe
|
||||
task.standardError = FileHandle.nullDevice
|
||||
|
||||
do {
|
||||
try task.run()
|
||||
task.waitUntilExit()
|
||||
|
||||
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
||||
let output = String(data: data, encoding: .utf8) ?? ""
|
||||
|
||||
let lines = output.components(separatedBy: "\n")
|
||||
var result = ""
|
||||
for line in lines {
|
||||
if line.hasPrefix("[") {
|
||||
if let range = line.range(of: "]") {
|
||||
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
|
||||
result += text + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
result = result.trimmingCharacters(in: .whitespaces)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
if !result.isEmpty {
|
||||
self.pasteText(result)
|
||||
} else {
|
||||
self.statusItem.button?.title = "🎤"
|
||||
self.updateStatus("Ready")
|
||||
if self.playSounds { NSSound(named: "Basso")?.play() }
|
||||
NSLog("No speech recognized")
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
DispatchQueue.main.async {
|
||||
self.statusItem.button?.title = "🎤"
|
||||
self.updateStatus("Error")
|
||||
if self.playSounds { NSSound(named: "Basso")?.play() }
|
||||
NSLog("Transcription failed: \(error)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Paste
|
||||
func pasteText(_ text: String) {
|
||||
let pasteboard = NSPasteboard.general
|
||||
pasteboard.clearContents()
|
||||
pasteboard.setString(text, forType: .string)
|
||||
|
||||
NSLog("Transcribed: \(text)")
|
||||
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||
let source = CGEventSource(stateID: .hidSystemState)
|
||||
|
||||
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true)
|
||||
keyDown?.flags = .maskCommand
|
||||
keyDown?.post(tap: .cghidEventTap)
|
||||
|
||||
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
|
||||
keyUp?.flags = .maskCommand
|
||||
keyUp?.post(tap: .cghidEventTap)
|
||||
|
||||
self.statusItem.button?.title = "🎤"
|
||||
self.updateStatus("Ready")
|
||||
if self.playSounds { NSSound(named: "Glass")?.play() }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Main
|
||||
let app = NSApplication.shared
|
||||
let delegate = AppDelegate()
|
||||
app.delegate = delegate
|
||||
app.setActivationPolicy(.accessory)
|
||||
app.run()
|
||||
Reference in New Issue
Block a user