Restructure for multi-platform support and add Settings UI
- Move macOS code to macos/ directory for platform separation - Add Settings window with configurable language, model path, and sound toggle - Add launch at login support using SMAppService - Add proper .app bundle structure with Info.plist - Add Makefile for build, install, run, and dmg targets - Store preferences in UserDefaults for persistence Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
51
macos/Makefile
Normal file
51
macos/Makefile
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# WhisperDictate macOS Build
|
||||||
|
|
||||||
|
APP_NAME = WhisperDictate
|
||||||
|
APP_BUNDLE = $(APP_NAME).app
|
||||||
|
VERSION = 1.0.0
|
||||||
|
|
||||||
|
# Directories
|
||||||
|
SRC_DIR = src
|
||||||
|
BUILD_DIR = build
|
||||||
|
BUNDLE_DIR = $(APP_BUNDLE)/Contents
|
||||||
|
|
||||||
|
# Compiler settings
|
||||||
|
SWIFT = swiftc
|
||||||
|
SWIFT_FLAGS = -O -framework Cocoa -framework AVFoundation -framework Carbon -framework CoreGraphics -framework ServiceManagement
|
||||||
|
|
||||||
|
.PHONY: all clean build install run dmg
|
||||||
|
|
||||||
|
all: build
|
||||||
|
|
||||||
|
build: $(BUILD_DIR)/$(APP_NAME)
|
||||||
|
@echo "✓ Build complete"
|
||||||
|
|
||||||
|
$(BUILD_DIR)/$(APP_NAME): $(SRC_DIR)/main.swift
|
||||||
|
@mkdir -p $(BUILD_DIR)
|
||||||
|
@mkdir -p $(BUNDLE_DIR)/MacOS
|
||||||
|
@mkdir -p $(BUNDLE_DIR)/Resources
|
||||||
|
$(SWIFT) $(SWIFT_FLAGS) -o $(BUNDLE_DIR)/MacOS/$(APP_NAME) $(SRC_DIR)/main.swift
|
||||||
|
@cp $(APP_BUNDLE)/Contents/Info.plist $(BUNDLE_DIR)/ 2>/dev/null || true
|
||||||
|
@touch $(BUILD_DIR)/$(APP_NAME)
|
||||||
|
@echo "✓ Built $(APP_BUNDLE)"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BUILD_DIR)
|
||||||
|
rm -rf $(BUNDLE_DIR)/MacOS/$(APP_NAME)
|
||||||
|
@echo "✓ Cleaned"
|
||||||
|
|
||||||
|
install: build
|
||||||
|
@rm -rf /Applications/$(APP_BUNDLE)
|
||||||
|
@cp -R $(APP_BUNDLE) /Applications/
|
||||||
|
@codesign --force --deep --sign - /Applications/$(APP_BUNDLE)
|
||||||
|
@echo "✓ Installed to /Applications/$(APP_BUNDLE)"
|
||||||
|
|
||||||
|
run: build
|
||||||
|
@./$(BUNDLE_DIR)/MacOS/$(APP_NAME) &
|
||||||
|
@echo "✓ Running $(APP_NAME)"
|
||||||
|
|
||||||
|
dmg: build
|
||||||
|
@rm -f $(APP_NAME)-$(VERSION).dmg
|
||||||
|
@codesign --force --deep --sign - $(APP_BUNDLE)
|
||||||
|
@hdiutil create -volname "$(APP_NAME)" -srcfolder $(APP_BUNDLE) -ov -format UDZO $(APP_NAME)-$(VERSION).dmg
|
||||||
|
@echo "✓ Created $(APP_NAME)-$(VERSION).dmg"
|
||||||
36
macos/WhisperDictate.app/Contents/Info.plist
Normal file
36
macos/WhisperDictate.app/Contents/Info.plist
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>CFBundleDevelopmentRegion</key>
|
||||||
|
<string>en</string>
|
||||||
|
<key>CFBundleExecutable</key>
|
||||||
|
<string>WhisperDictate</string>
|
||||||
|
<key>CFBundleIconFile</key>
|
||||||
|
<string>AppIcon</string>
|
||||||
|
<key>CFBundleIdentifier</key>
|
||||||
|
<string>com.whisperdictate.app</string>
|
||||||
|
<key>CFBundleInfoDictionaryVersion</key>
|
||||||
|
<string>6.0</string>
|
||||||
|
<key>CFBundleName</key>
|
||||||
|
<string>WhisperDictate</string>
|
||||||
|
<key>CFBundleDisplayName</key>
|
||||||
|
<string>WhisperDictate</string>
|
||||||
|
<key>CFBundlePackageType</key>
|
||||||
|
<string>APPL</string>
|
||||||
|
<key>CFBundleShortVersionString</key>
|
||||||
|
<string>1.0.0</string>
|
||||||
|
<key>CFBundleVersion</key>
|
||||||
|
<string>1</string>
|
||||||
|
<key>LSMinimumSystemVersion</key>
|
||||||
|
<string>13.0</string>
|
||||||
|
<key>LSUIElement</key>
|
||||||
|
<true/>
|
||||||
|
<key>NSHighResolutionCapable</key>
|
||||||
|
<true/>
|
||||||
|
<key>NSMicrophoneUsageDescription</key>
|
||||||
|
<string>WhisperDictate needs microphone access to record your voice for transcription.</string>
|
||||||
|
<key>NSAppleEventsUsageDescription</key>
|
||||||
|
<string>WhisperDictate needs accessibility access to paste transcribed text.</string>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
412
macos/src/main.swift
Normal file
412
macos/src/main.swift
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
import Cocoa
|
||||||
|
import AVFoundation
|
||||||
|
import Carbon.HIToolbox
|
||||||
|
import ServiceManagement
|
||||||
|
|
||||||
|
// MARK: - User Defaults Keys
|
||||||
|
struct Defaults {
|
||||||
|
static let language = "whisperLanguage"
|
||||||
|
static let modelPath = "whisperModelPath"
|
||||||
|
static let playSounds = "playSounds"
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - App Delegate
|
||||||
|
class AppDelegate: NSObject, NSApplicationDelegate {
|
||||||
|
var statusItem: NSStatusItem!
|
||||||
|
var audioRecorder: AVAudioRecorder?
|
||||||
|
var isRecording = false
|
||||||
|
var settingsWindow: NSWindow?
|
||||||
|
|
||||||
|
let audioFilePath = "/tmp/whisper-dictate.wav"
|
||||||
|
|
||||||
|
var language: String {
|
||||||
|
get { UserDefaults.standard.string(forKey: Defaults.language) ?? "hu" }
|
||||||
|
set { UserDefaults.standard.set(newValue, forKey: Defaults.language) }
|
||||||
|
}
|
||||||
|
|
||||||
|
var modelPath: String {
|
||||||
|
get { UserDefaults.standard.string(forKey: Defaults.modelPath) ?? NSHomeDirectory() + "/.whisper-models/ggml-medium.bin" }
|
||||||
|
set { UserDefaults.standard.set(newValue, forKey: Defaults.modelPath) }
|
||||||
|
}
|
||||||
|
|
||||||
|
var playSounds: Bool {
|
||||||
|
get { UserDefaults.standard.object(forKey: Defaults.playSounds) as? Bool ?? true }
|
||||||
|
set { UserDefaults.standard.set(newValue, forKey: Defaults.playSounds) }
|
||||||
|
}
|
||||||
|
|
||||||
|
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||||
|
setupStatusItem()
|
||||||
|
registerHotkey()
|
||||||
|
requestMicrophonePermission()
|
||||||
|
checkModelExists()
|
||||||
|
|
||||||
|
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Status Item
|
||||||
|
func setupStatusItem() {
|
||||||
|
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
|
||||||
|
statusItem.button?.title = "🎤"
|
||||||
|
|
||||||
|
let menu = NSMenu()
|
||||||
|
|
||||||
|
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
|
||||||
|
menu.addItem(NSMenuItem.separator())
|
||||||
|
|
||||||
|
let statusMenuItem = NSMenuItem(title: "Status: Ready", action: nil, keyEquivalent: "")
|
||||||
|
statusMenuItem.tag = 100
|
||||||
|
menu.addItem(statusMenuItem)
|
||||||
|
|
||||||
|
menu.addItem(NSMenuItem.separator())
|
||||||
|
menu.addItem(NSMenuItem(title: "Settings...", action: #selector(showSettings), keyEquivalent: ","))
|
||||||
|
menu.addItem(NSMenuItem.separator())
|
||||||
|
menu.addItem(NSMenuItem(title: "Quit WhisperDictate", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
|
||||||
|
|
||||||
|
statusItem.menu = menu
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Settings Window
|
||||||
|
@objc func showSettings() {
|
||||||
|
if settingsWindow == nil {
|
||||||
|
settingsWindow = createSettingsWindow()
|
||||||
|
}
|
||||||
|
settingsWindow?.makeKeyAndOrderFront(nil)
|
||||||
|
NSApp.activate(ignoringOtherApps: true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func createSettingsWindow() -> NSWindow {
|
||||||
|
let window = NSWindow(
|
||||||
|
contentRect: NSRect(x: 0, y: 0, width: 450, height: 280),
|
||||||
|
styleMask: [.titled, .closable],
|
||||||
|
backing: .buffered,
|
||||||
|
defer: false
|
||||||
|
)
|
||||||
|
window.title = "WhisperDictate Settings"
|
||||||
|
window.center()
|
||||||
|
|
||||||
|
let contentView = NSView(frame: window.contentView!.bounds)
|
||||||
|
|
||||||
|
var y: CGFloat = 230
|
||||||
|
let labelWidth: CGFloat = 120
|
||||||
|
let controlX: CGFloat = 140
|
||||||
|
let controlWidth: CGFloat = 280
|
||||||
|
|
||||||
|
// Language
|
||||||
|
let langLabel = NSTextField(labelWithString: "Language:")
|
||||||
|
langLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||||
|
contentView.addSubview(langLabel)
|
||||||
|
|
||||||
|
let langField = NSTextField(string: language)
|
||||||
|
langField.frame = NSRect(x: controlX, y: y, width: 60, height: 24)
|
||||||
|
langField.tag = 1
|
||||||
|
langField.target = self
|
||||||
|
langField.action = #selector(languageChanged(_:))
|
||||||
|
contentView.addSubview(langField)
|
||||||
|
|
||||||
|
let langHint = NSTextField(labelWithString: "(hu, en, de, fr, es...)")
|
||||||
|
langHint.frame = NSRect(x: 210, y: y, width: 150, height: 24)
|
||||||
|
langHint.textColor = .secondaryLabelColor
|
||||||
|
langHint.font = NSFont.systemFont(ofSize: 11)
|
||||||
|
contentView.addSubview(langHint)
|
||||||
|
|
||||||
|
y -= 40
|
||||||
|
|
||||||
|
// Model Path
|
||||||
|
let modelLabel = NSTextField(labelWithString: "Model Path:")
|
||||||
|
modelLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||||
|
contentView.addSubview(modelLabel)
|
||||||
|
|
||||||
|
let modelField = NSTextField(string: modelPath)
|
||||||
|
modelField.frame = NSRect(x: controlX, y: y, width: controlWidth - 40, height: 24)
|
||||||
|
modelField.tag = 2
|
||||||
|
modelField.target = self
|
||||||
|
modelField.action = #selector(modelPathChanged(_:))
|
||||||
|
contentView.addSubview(modelField)
|
||||||
|
|
||||||
|
let browseBtn = NSButton(title: "...", target: self, action: #selector(browseModel))
|
||||||
|
browseBtn.frame = NSRect(x: controlX + controlWidth - 35, y: y, width: 35, height: 24)
|
||||||
|
contentView.addSubview(browseBtn)
|
||||||
|
|
||||||
|
y -= 40
|
||||||
|
|
||||||
|
// Hotkey (display only)
|
||||||
|
let hotkeyLabel = NSTextField(labelWithString: "Hotkey:")
|
||||||
|
hotkeyLabel.frame = NSRect(x: 20, y: y, width: labelWidth, height: 24)
|
||||||
|
contentView.addSubview(hotkeyLabel)
|
||||||
|
|
||||||
|
let hotkeyDisplay = NSTextField(labelWithString: "⌃⌥D (Control + Option + D)")
|
||||||
|
hotkeyDisplay.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||||
|
contentView.addSubview(hotkeyDisplay)
|
||||||
|
|
||||||
|
y -= 40
|
||||||
|
|
||||||
|
// Play sounds
|
||||||
|
let soundCheck = NSButton(checkboxWithTitle: "Play sound feedback", target: self, action: #selector(playSoundsChanged(_:)))
|
||||||
|
soundCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||||
|
soundCheck.state = playSounds ? .on : .off
|
||||||
|
contentView.addSubview(soundCheck)
|
||||||
|
|
||||||
|
y -= 40
|
||||||
|
|
||||||
|
// Launch at login
|
||||||
|
let loginCheck = NSButton(checkboxWithTitle: "Launch at login", target: self, action: #selector(launchAtLoginChanged(_:)))
|
||||||
|
loginCheck.frame = NSRect(x: controlX, y: y, width: controlWidth, height: 24)
|
||||||
|
loginCheck.state = isLaunchAtLoginEnabled() ? .on : .off
|
||||||
|
contentView.addSubview(loginCheck)
|
||||||
|
|
||||||
|
// Model download hint
|
||||||
|
let hintLabel = NSTextField(wrappingLabelWithString: "Model not found? Run: curl -L -o ~/.whisper-models/ggml-medium.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin")
|
||||||
|
hintLabel.frame = NSRect(x: 20, y: 15, width: 410, height: 40)
|
||||||
|
hintLabel.font = NSFont.systemFont(ofSize: 10)
|
||||||
|
hintLabel.textColor = .secondaryLabelColor
|
||||||
|
contentView.addSubview(hintLabel)
|
||||||
|
|
||||||
|
window.contentView = contentView
|
||||||
|
return window
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func languageChanged(_ sender: NSTextField) {
|
||||||
|
language = sender.stringValue
|
||||||
|
NSLog("Language changed to: \(language)")
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func modelPathChanged(_ sender: NSTextField) {
|
||||||
|
modelPath = sender.stringValue
|
||||||
|
checkModelExists()
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func browseModel() {
|
||||||
|
let panel = NSOpenPanel()
|
||||||
|
panel.allowsMultipleSelection = false
|
||||||
|
panel.canChooseDirectories = false
|
||||||
|
panel.message = "Select Whisper model file (.bin)"
|
||||||
|
|
||||||
|
if panel.runModal() == .OK, let url = panel.url {
|
||||||
|
modelPath = url.path
|
||||||
|
if let contentView = settingsWindow?.contentView {
|
||||||
|
for subview in contentView.subviews {
|
||||||
|
if let textField = subview as? NSTextField, textField.tag == 2 {
|
||||||
|
textField.stringValue = modelPath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
checkModelExists()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func playSoundsChanged(_ sender: NSButton) {
|
||||||
|
playSounds = sender.state == .on
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func launchAtLoginChanged(_ sender: NSButton) {
|
||||||
|
setLaunchAtLogin(sender.state == .on)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Launch at Login
|
||||||
|
func isLaunchAtLoginEnabled() -> Bool {
|
||||||
|
if #available(macOS 13.0, *) {
|
||||||
|
return SMAppService.mainApp.status == .enabled
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func setLaunchAtLogin(_ enabled: Bool) {
|
||||||
|
if #available(macOS 13.0, *) {
|
||||||
|
do {
|
||||||
|
if enabled {
|
||||||
|
try SMAppService.mainApp.register()
|
||||||
|
} else {
|
||||||
|
try SMAppService.mainApp.unregister()
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
NSLog("Failed to set launch at login: \(error)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Model Check
|
||||||
|
func checkModelExists() {
|
||||||
|
if !FileManager.default.fileExists(atPath: modelPath) {
|
||||||
|
updateStatus("⚠️ Model not found")
|
||||||
|
} else {
|
||||||
|
updateStatus("Ready")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateStatus(_ status: String) {
|
||||||
|
if let menu = statusItem.menu {
|
||||||
|
for item in menu.items {
|
||||||
|
if item.tag == 100 {
|
||||||
|
item.title = "Status: \(status)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Permissions
|
||||||
|
func requestMicrophonePermission() {
|
||||||
|
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
||||||
|
if !granted {
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
let alert = NSAlert()
|
||||||
|
alert.messageText = "Microphone Access Required"
|
||||||
|
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
|
||||||
|
alert.alertStyle = .warning
|
||||||
|
alert.runModal()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Hotkey Registration
|
||||||
|
func registerHotkey() {
|
||||||
|
var hotKeyRef: EventHotKeyRef?
|
||||||
|
var gMyHotKeyID = EventHotKeyID()
|
||||||
|
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
|
||||||
|
gMyHotKeyID.id = 1
|
||||||
|
|
||||||
|
let modifiers: UInt32 = UInt32(controlKey | optionKey)
|
||||||
|
let keyCode: UInt32 = 2 // D key
|
||||||
|
|
||||||
|
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
|
||||||
|
|
||||||
|
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
|
||||||
|
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
|
||||||
|
let appDelegate = NSApplication.shared.delegate as! AppDelegate
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
appDelegate.toggleRecording()
|
||||||
|
}
|
||||||
|
return noErr
|
||||||
|
}, 1, &eventType, nil, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Recording
|
||||||
|
@objc func toggleRecording() {
|
||||||
|
if isRecording {
|
||||||
|
stopRecordingAndTranscribe()
|
||||||
|
} else {
|
||||||
|
startRecording()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startRecording() {
|
||||||
|
let audioURL = URL(fileURLWithPath: audioFilePath)
|
||||||
|
try? FileManager.default.removeItem(at: audioURL)
|
||||||
|
|
||||||
|
let settings: [String: Any] = [
|
||||||
|
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
||||||
|
AVSampleRateKey: 16000,
|
||||||
|
AVNumberOfChannelsKey: 1,
|
||||||
|
AVLinearPCMBitDepthKey: 16,
|
||||||
|
AVLinearPCMIsFloatKey: false,
|
||||||
|
AVLinearPCMIsBigEndianKey: false
|
||||||
|
]
|
||||||
|
|
||||||
|
do {
|
||||||
|
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
|
||||||
|
audioRecorder?.record()
|
||||||
|
isRecording = true
|
||||||
|
statusItem.button?.title = "🔴"
|
||||||
|
updateStatus("Recording...")
|
||||||
|
if playSounds { NSSound(named: "Tink")?.play() }
|
||||||
|
NSLog("Recording started")
|
||||||
|
} catch {
|
||||||
|
NSLog("Recording failed: \(error)")
|
||||||
|
if playSounds { NSSound(named: "Basso")?.play() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopRecordingAndTranscribe() {
|
||||||
|
audioRecorder?.stop()
|
||||||
|
isRecording = false
|
||||||
|
statusItem.button?.title = "⏳"
|
||||||
|
updateStatus("Transcribing...")
|
||||||
|
if playSounds { NSSound(named: "Pop")?.play() }
|
||||||
|
NSLog("Recording stopped, transcribing...")
|
||||||
|
|
||||||
|
DispatchQueue.global(qos: .userInitiated).async {
|
||||||
|
self.transcribe()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Transcription
|
||||||
|
func transcribe() {
|
||||||
|
let task = Process()
|
||||||
|
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
|
||||||
|
task.arguments = ["-m", modelPath, "-l", language, "-f", audioFilePath]
|
||||||
|
|
||||||
|
let pipe = Pipe()
|
||||||
|
task.standardOutput = pipe
|
||||||
|
task.standardError = FileHandle.nullDevice
|
||||||
|
|
||||||
|
do {
|
||||||
|
try task.run()
|
||||||
|
task.waitUntilExit()
|
||||||
|
|
||||||
|
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
||||||
|
let output = String(data: data, encoding: .utf8) ?? ""
|
||||||
|
|
||||||
|
let lines = output.components(separatedBy: "\n")
|
||||||
|
var result = ""
|
||||||
|
for line in lines {
|
||||||
|
if line.hasPrefix("[") {
|
||||||
|
if let range = line.range(of: "]") {
|
||||||
|
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
|
||||||
|
result += text + " "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = result.trimmingCharacters(in: .whitespaces)
|
||||||
|
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
if !result.isEmpty {
|
||||||
|
self.pasteText(result)
|
||||||
|
} else {
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
self.updateStatus("Ready")
|
||||||
|
if self.playSounds { NSSound(named: "Basso")?.play() }
|
||||||
|
NSLog("No speech recognized")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
self.updateStatus("Error")
|
||||||
|
if self.playSounds { NSSound(named: "Basso")?.play() }
|
||||||
|
NSLog("Transcription failed: \(error)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Paste
|
||||||
|
func pasteText(_ text: String) {
|
||||||
|
let pasteboard = NSPasteboard.general
|
||||||
|
pasteboard.clearContents()
|
||||||
|
pasteboard.setString(text, forType: .string)
|
||||||
|
|
||||||
|
NSLog("Transcribed: \(text)")
|
||||||
|
|
||||||
|
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||||
|
let source = CGEventSource(stateID: .hidSystemState)
|
||||||
|
|
||||||
|
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true)
|
||||||
|
keyDown?.flags = .maskCommand
|
||||||
|
keyDown?.post(tap: .cghidEventTap)
|
||||||
|
|
||||||
|
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
|
||||||
|
keyUp?.flags = .maskCommand
|
||||||
|
keyUp?.post(tap: .cghidEventTap)
|
||||||
|
|
||||||
|
self.statusItem.button?.title = "🎤"
|
||||||
|
self.updateStatus("Ready")
|
||||||
|
if self.playSounds { NSSound(named: "Glass")?.play() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Main
|
||||||
|
let app = NSApplication.shared
|
||||||
|
let delegate = AppDelegate()
|
||||||
|
app.delegate = delegate
|
||||||
|
app.setActivationPolicy(.accessory)
|
||||||
|
app.run()
|
||||||
193
main.swift
193
main.swift
@@ -1,193 +0,0 @@
|
|||||||
import Cocoa
|
|
||||||
import AVFoundation
|
|
||||||
import Carbon.HIToolbox
|
|
||||||
|
|
||||||
class AppDelegate: NSObject, NSApplicationDelegate {
|
|
||||||
var statusItem: NSStatusItem!
|
|
||||||
var audioRecorder: AVAudioRecorder?
|
|
||||||
var isRecording = false
|
|
||||||
let audioFilePath = "/tmp/whisper-dictate.wav"
|
|
||||||
let whisperModel = NSHomeDirectory() + "/.whisper-models/ggml-medium.bin"
|
|
||||||
|
|
||||||
func applicationDidFinishLaunching(_ notification: Notification) {
|
|
||||||
// Create menu bar item
|
|
||||||
statusItem = NSStatusBar.system.statusItem(withLength: NSStatusItem.variableLength)
|
|
||||||
statusItem.button?.title = "🎤"
|
|
||||||
|
|
||||||
let menu = NSMenu()
|
|
||||||
menu.addItem(NSMenuItem(title: "Toggle Recording (⌃⌥D)", action: #selector(toggleRecording), keyEquivalent: ""))
|
|
||||||
menu.addItem(NSMenuItem.separator())
|
|
||||||
menu.addItem(NSMenuItem(title: "Quit", action: #selector(NSApplication.terminate(_:)), keyEquivalent: "q"))
|
|
||||||
statusItem.menu = menu
|
|
||||||
|
|
||||||
// Register global hotkey (Control + Option + D)
|
|
||||||
registerHotkey()
|
|
||||||
|
|
||||||
// Request microphone permission
|
|
||||||
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
|
||||||
if !granted {
|
|
||||||
DispatchQueue.main.async {
|
|
||||||
let alert = NSAlert()
|
|
||||||
alert.messageText = "Microphone access required"
|
|
||||||
alert.informativeText = "Please enable microphone access in System Settings → Privacy & Security → Microphone"
|
|
||||||
alert.runModal()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
NSLog("WhisperDictate started. Press ⌃⌥D to toggle recording.")
|
|
||||||
}
|
|
||||||
|
|
||||||
func registerHotkey() {
|
|
||||||
// Register Control + Option + D
|
|
||||||
var hotKeyRef: EventHotKeyRef?
|
|
||||||
var gMyHotKeyID = EventHotKeyID()
|
|
||||||
gMyHotKeyID.signature = OSType(0x57485044) // "WHPD"
|
|
||||||
gMyHotKeyID.id = 1
|
|
||||||
|
|
||||||
// D = 2, Control = 0x1000, Option = 0x0800
|
|
||||||
let modifiers: UInt32 = UInt32(controlKey | optionKey)
|
|
||||||
let keyCode: UInt32 = 2 // D key
|
|
||||||
|
|
||||||
RegisterEventHotKey(keyCode, modifiers, gMyHotKeyID, GetApplicationEventTarget(), 0, &hotKeyRef)
|
|
||||||
|
|
||||||
// Install event handler
|
|
||||||
var eventType = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
|
|
||||||
InstallEventHandler(GetApplicationEventTarget(), { (_, event, _) -> OSStatus in
|
|
||||||
let appDelegate = NSApplication.shared.delegate as! AppDelegate
|
|
||||||
DispatchQueue.main.async {
|
|
||||||
appDelegate.toggleRecording()
|
|
||||||
}
|
|
||||||
return noErr
|
|
||||||
}, 1, &eventType, nil, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
@objc func toggleRecording() {
|
|
||||||
if isRecording {
|
|
||||||
stopRecordingAndTranscribe()
|
|
||||||
} else {
|
|
||||||
startRecording()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func startRecording() {
|
|
||||||
let audioURL = URL(fileURLWithPath: audioFilePath)
|
|
||||||
|
|
||||||
// Remove old file
|
|
||||||
try? FileManager.default.removeItem(at: audioURL)
|
|
||||||
|
|
||||||
let settings: [String: Any] = [
|
|
||||||
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
|
||||||
AVSampleRateKey: 16000,
|
|
||||||
AVNumberOfChannelsKey: 1,
|
|
||||||
AVLinearPCMBitDepthKey: 16,
|
|
||||||
AVLinearPCMIsFloatKey: false,
|
|
||||||
AVLinearPCMIsBigEndianKey: false
|
|
||||||
]
|
|
||||||
|
|
||||||
do {
|
|
||||||
audioRecorder = try AVAudioRecorder(url: audioURL, settings: settings)
|
|
||||||
audioRecorder?.record()
|
|
||||||
isRecording = true
|
|
||||||
statusItem.button?.title = "🔴"
|
|
||||||
NSSound(named: "Tink")?.play()
|
|
||||||
NSLog("Recording started")
|
|
||||||
} catch {
|
|
||||||
NSLog("Recording failed: \(error)")
|
|
||||||
NSSound(named: "Basso")?.play()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func stopRecordingAndTranscribe() {
|
|
||||||
audioRecorder?.stop()
|
|
||||||
isRecording = false
|
|
||||||
statusItem.button?.title = "⏳"
|
|
||||||
NSSound(named: "Pop")?.play()
|
|
||||||
NSLog("Recording stopped, transcribing...")
|
|
||||||
|
|
||||||
DispatchQueue.global(qos: .userInitiated).async {
|
|
||||||
self.transcribe()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func transcribe() {
|
|
||||||
let task = Process()
|
|
||||||
task.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/whisper-cli")
|
|
||||||
task.arguments = ["-m", whisperModel, "-l", "hu", "-f", audioFilePath]
|
|
||||||
|
|
||||||
let pipe = Pipe()
|
|
||||||
task.standardOutput = pipe
|
|
||||||
task.standardError = FileHandle.nullDevice
|
|
||||||
|
|
||||||
do {
|
|
||||||
try task.run()
|
|
||||||
task.waitUntilExit()
|
|
||||||
|
|
||||||
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
|
||||||
let output = String(data: data, encoding: .utf8) ?? ""
|
|
||||||
|
|
||||||
// Parse output - extract text from lines like "[00:00:00.000 --> 00:00:03.000] Hello world"
|
|
||||||
let lines = output.components(separatedBy: "\n")
|
|
||||||
var result = ""
|
|
||||||
for line in lines {
|
|
||||||
if line.hasPrefix("[") {
|
|
||||||
if let range = line.range(of: "]") {
|
|
||||||
let text = String(line[range.upperBound...]).trimmingCharacters(in: .whitespaces)
|
|
||||||
result += text + " "
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result = result.trimmingCharacters(in: .whitespaces)
|
|
||||||
|
|
||||||
DispatchQueue.main.async {
|
|
||||||
if !result.isEmpty {
|
|
||||||
self.pasteText(result)
|
|
||||||
} else {
|
|
||||||
self.statusItem.button?.title = "🎤"
|
|
||||||
NSSound(named: "Basso")?.play()
|
|
||||||
NSLog("No speech recognized")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
DispatchQueue.main.async {
|
|
||||||
self.statusItem.button?.title = "🎤"
|
|
||||||
NSSound(named: "Basso")?.play()
|
|
||||||
NSLog("Transcription failed: \(error)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func pasteText(_ text: String) {
|
|
||||||
// Copy to clipboard
|
|
||||||
let pasteboard = NSPasteboard.general
|
|
||||||
pasteboard.clearContents()
|
|
||||||
pasteboard.setString(text, forType: .string)
|
|
||||||
|
|
||||||
NSLog("Transcribed: \(text)")
|
|
||||||
|
|
||||||
// Simulate Cmd+V
|
|
||||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
|
||||||
let source = CGEventSource(stateID: .hidSystemState)
|
|
||||||
|
|
||||||
// Key down
|
|
||||||
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true) // V key
|
|
||||||
keyDown?.flags = .maskCommand
|
|
||||||
keyDown?.post(tap: .cghidEventTap)
|
|
||||||
|
|
||||||
// Key up
|
|
||||||
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false)
|
|
||||||
keyUp?.flags = .maskCommand
|
|
||||||
keyUp?.post(tap: .cghidEventTap)
|
|
||||||
|
|
||||||
self.statusItem.button?.title = "🎤"
|
|
||||||
NSSound(named: "Glass")?.play()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Main
|
|
||||||
let app = NSApplication.shared
|
|
||||||
let delegate = AppDelegate()
|
|
||||||
app.delegate = delegate
|
|
||||||
app.setActivationPolicy(.accessory) // Menu bar only, no dock icon
|
|
||||||
app.run()
|
|
||||||
Reference in New Issue
Block a user