diff --git a/TypeaheadAI.xcodeproj/project.pbxproj b/TypeaheadAI.xcodeproj/project.pbxproj index 00c80ca..a2c7bb6 100644 --- a/TypeaheadAI.xcodeproj/project.pbxproj +++ b/TypeaheadAI.xcodeproj/project.pbxproj @@ -58,6 +58,7 @@ 2B8CD4962B05FF59003E0589 /* ModalFooterView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD4952B05FF59003E0589 /* ModalFooterView.swift */; }; 2B8CD4992B06DD36003E0589 /* Functions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD4982B06DD36003E0589 /* Functions.swift */; }; 2B8CD49B2B076AE6003E0589 /* ActivateOnboardingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD49A2B076AE6003E0589 /* ActivateOnboardingView.swift */; }; + 2B8CD49F2B084779003E0589 /* SmartClickActor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD49E2B084779003E0589 /* SmartClickActor.swift */; }; 2B92BDB92AA3A2DD00E65CFA /* CustomModalWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */; }; 2B92BDBB2AA3D10800E65CFA /* ModalManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */; }; 2BA3C2352AADAC5700537F95 /* llama in Frameworks */ = {isa = PBXBuildFile; productRef = 2BA3C2342AADAC5700537F95 /* llama */; }; @@ -170,6 +171,7 @@ 2B8CD4952B05FF59003E0589 /* ModalFooterView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalFooterView.swift; sourceTree = ""; }; 2B8CD4982B06DD36003E0589 /* Functions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Functions.swift; sourceTree = ""; }; 2B8CD49A2B076AE6003E0589 /* ActivateOnboardingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivateOnboardingView.swift; sourceTree = ""; }; + 2B8CD49E2B084779003E0589 /* SmartClickActor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SmartClickActor.swift; sourceTree = ""; }; 2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomModalWindow.swift; sourceTree = ""; }; 2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalManager.swift; sourceTree = ""; }; 2BA3C2362AADAD9A00537F95 /* SpecialCopyActor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpecialCopyActor.swift; sourceTree = ""; }; @@ -280,6 +282,7 @@ 2B3792302AB83739008D812F /* CanSimulatePaste.swift */, 2B0B300F2ACF8C8000338B76 /* SpecialOpenActor.swift */, 2B8CD4932B05D278003E0589 /* CanPerformOCR.swift */, + 2B8CD49E2B084779003E0589 /* SmartClickActor.swift */, ); path = Actors; sourceTree = ""; @@ -714,6 +717,7 @@ 2B27450A2AB01CF400F37D3E /* SpecialSaveActor.swift in Sources */, 2B8CD4962B05FF59003E0589 /* ModalFooterView.swift in Sources */, 2B7D35842B01B14100E85AEF /* IntroOnboardingView.swift in Sources */, + 2B8CD49F2B084779003E0589 /* SmartClickActor.swift in Sources */, 2B7D358A2B01ED5100E85AEF /* SmartCopyOnboardingView.swift in Sources */, 2BAFDB6B2AF60F64009C8370 /* MenuButtonView.swift in Sources */, 2B7BBAC22AF3347100E4CE1F /* NewQuickActionForm.swift in Sources */, diff --git a/TypeaheadAI/Actors/CanPerformOCR.swift b/TypeaheadAI/Actors/CanPerformOCR.swift index c913285..9d2d986 100644 --- a/TypeaheadAI/Actors/CanPerformOCR.swift +++ b/TypeaheadAI/Actors/CanPerformOCR.swift @@ -15,11 +15,11 @@ private struct Constants { } protocol CanPerformOCR { - func performOCR(image: CGImage) async throws -> (String, NSImage?) + func performOCR(image: CGImage, level: VNRequestTextRecognitionLevel) async throws -> (String, NSImage?) } extension CanPerformOCR { - func performOCR(image: CGImage) async throws -> (String, NSImage?) { + func performOCR(image: CGImage, level: VNRequestTextRecognitionLevel = .accurate) async throws -> (String, NSImage?) { try await withCheckedThrowingContinuation { continuation in let request = VNRecognizeTextRequest { (request, error) in if let error = error { @@ -39,7 +39,7 @@ extension CanPerformOCR { continuation.resume(returning: (allRecognizedText, imageWithBoxes)) } - request.recognitionLevel = .accurate + request.recognitionLevel = level request.automaticallyDetectsLanguage = true let handler = VNImageRequestHandler(cgImage: image, options: [:]) @@ -140,7 +140,7 @@ extension CanPerformOCR { bitsPerComponent: image.bitsPerComponent, bytesPerRow: image.bytesPerRow, space: image.colorSpace ?? CGColorSpace(name: CGColorSpace.sRGB)!, - bitmapInfo: image.bitmapInfo.rawValue + bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue ) else { return nil } diff --git a/TypeaheadAI/Actors/SmartClickActor.swift b/TypeaheadAI/Actors/SmartClickActor.swift new file mode 100644 index 0000000..a8dd284 --- /dev/null +++ b/TypeaheadAI/Actors/SmartClickActor.swift @@ -0,0 +1,98 @@ +// +// SmartClickActor.swift +// TypeaheadAI +// +// Created by Jeff Hara on 11/17/23. +// + +import AppKit +import Foundation +import os.log + +actor SmartClickActor: CanPerformOCR, CanSimulateCopy { + private let intentManager: IntentManager + private let clientManager: ClientManager + private let promptManager: QuickActionManager + private let modalManager: ModalManager + private let appContextManager: AppContextManager + + private let logger = Logger( + subsystem: "ai.typeahead.TypeaheadAI", + category: "SmartClickActor" + ) + + init( + intentManager: IntentManager, + clientManager: ClientManager, + promptManager: QuickActionManager, + modalManager: ModalManager, + appContextManager: AppContextManager + ) { + self.intentManager = intentManager + self.clientManager = clientManager + self.promptManager = promptManager + self.modalManager = modalManager + self.appContextManager = appContextManager + } + + func smartClick() async throws { + var appContext = try await self.appContextManager.getActiveAppInfo() + await self.modalManager.forceRefresh() + await self.modalManager.showModal(mousePos: appContext?.mousePos) + await NSApp.activate(ignoringOtherApps: true) + + // NOTE: Experimental: + // Get contents from Pasteboard (including Universal clipboard if phone is nearby) + var copiedText: String? = nil // NSPasteboard.general.string(forType: .string) + + // Attempt to copy whatever is currently selected, overrides previously copied text + do { + try await simulateCopy() + copiedText = NSPasteboard.general.string(forType: .string) + } catch { + // no-op: if nothing was copied, then don't do anything + } + + // Set the copied text as a new message + if let copiedText = copiedText { + await self.modalManager.setUserMessage(copiedText, messageType: .string) + } + + // Set the OCR'ed text + if let screenshot = appContext?.screenshotPath.flatMap({ NSImage(contentsOfFile: $0)?.toCGImage() }) { + let (ocrText, _) = try await performOCR(image: screenshot) + appContext?.ocrText = ocrText + } + + // Try to predict the user intent + let contextualIntents = self.intentManager.fetchContextualIntents(limit: 3, appContext: appContext) + await self.modalManager.setUserIntents(intents: contextualIntents) + + // Kick off async + Task { + // Set the OCR'ed text + if let screenshot = appContext?.screenshotPath.flatMap({ + NSImage(contentsOfFile: $0)?.toCGImage() + }) { + let (ocrText, _) = try await performOCR(image: screenshot) + appContext?.ocrText = ocrText + } + + if let intents = try await self.clientManager.suggestIntents( + id: UUID(), + username: NSUserName(), + userFullName: NSFullUserName(), + userObjective: self.promptManager.getActivePrompt(), + userBio: UserDefaults.standard.string(forKey: "bio") ?? "", + userLang: Locale.preferredLanguages.first ?? "", + copiedText: copiedText, + messages: self.modalManager.messages, + history: [], + appContext: appContext, + incognitoMode: !self.modalManager.online + ), !intents.intents.isEmpty { + await self.modalManager.appendUserIntents(intents: intents.intents) + } + } + } +} diff --git a/TypeaheadAI/AppContextManager.swift b/TypeaheadAI/AppContextManager.swift index bba1902..b478ea8 100644 --- a/TypeaheadAI/AppContextManager.swift +++ b/TypeaheadAI/AppContextManager.swift @@ -14,6 +14,7 @@ struct AppContext: Codable { let appName: String? let bundleIdentifier: String? let url: URL? + let mousePos: NSPoint var screenshotPath: String? = nil var ocrText: String? = nil } @@ -28,10 +29,12 @@ class AppContextManager { ) func getActiveAppInfo() async throws -> AppContext? { - guard let activeApp = NSWorkspace.shared.frontmostApplication else { + guard let activeApp = NSWorkspace.shared.menuBarOwningApplication else { return nil } + let mousePos = NSEvent.mouseLocation + let appName = activeApp.localizedName let bundleIdentifier = activeApp.bundleIdentifier self.logger.info("active app: \(bundleIdentifier ?? "")") @@ -49,17 +52,21 @@ class AppContextManager { appName: appName, bundleIdentifier: bundleIdentifier, url: strippedUrl, + mousePos: mousePos, screenshotPath: screenshotPath ) } } catch { self.logger.error("Failed to execute script: \(error.localizedDescription)") } - - return AppContext(appName: appName, bundleIdentifier: bundleIdentifier, url: nil) - } else { - return AppContext(appName: appName, bundleIdentifier: bundleIdentifier, url: nil) } + + return AppContext( + appName: appName, + bundleIdentifier: bundleIdentifier, + url: nil, + mousePos: mousePos + ) } private func stripQueryParameters(from url: URL) -> URL? { diff --git a/TypeaheadAI/AppDelegate.swift b/TypeaheadAI/AppDelegate.swift index 4980f1e..61fa53c 100644 --- a/TypeaheadAI/AppDelegate.swift +++ b/TypeaheadAI/AppDelegate.swift @@ -5,12 +5,16 @@ // Created by Jeff Hara on 11/5/23. // +import Cocoa +import CoreGraphics import Foundation import Supabase import SwiftUI import UserNotifications class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCenterDelegate { + var eventTap: CFMachPort? + func application(_ application: NSApplication, open urls: [URL]) { for url in urls { // Handle the URL @@ -33,10 +37,38 @@ class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCenterDele UNUserNotificationCenter.current().delegate = self NotificationCenter.default.post(name: .startOnboarding, object: nil) + + let eventMask = (1 << CGEventType.rightMouseDown.rawValue) + guard let tap = CGEvent.tapCreate( + tap: .cgSessionEventTap, + place: .headInsertEventTap, + options: .defaultTap, + eventsOfInterest: CGEventMask(eventMask), + callback: { (proxy: CGEventTapProxy, type: CGEventType, event: CGEvent, refcon: UnsafeMutableRawPointer?) in + if type == .rightMouseDown && event.flags.contains(.maskCommand) { + // Suppress the right-click and publish smart-click event + NotificationCenter.default.post(name: .smartClick, object: nil) + return nil + } + + return Unmanaged.passRetained(event) + }, + userInfo: nil + ) else { + print("Failed to create event tap") + exit(1) + } + + eventTap = tap + let runLoopSource = CFMachPortCreateRunLoopSource(kCFAllocatorDefault, tap, 0) + CFRunLoopAddSource(CFRunLoopGetCurrent(), runLoopSource, .commonModes) + CGEvent.tapEnable(tap: tap, enable: true) } func applicationWillTerminate(_ aNotification: Notification) { - // Insert code here to tear down your application + if let tap = eventTap { + CGEvent.tapEnable(tap: tap, enable: false) + } } // MARK: - UNUserNotificationCenterDelegate diff --git a/TypeaheadAI/AppState.swift b/TypeaheadAI/AppState.swift index b357d7b..85b0eb9 100644 --- a/TypeaheadAI/AppState.swift +++ b/TypeaheadAI/AppState.swift @@ -45,6 +45,7 @@ final class AppState: ObservableObject { private var specialCopyActor: SpecialCopyActor? = nil private var specialSaveActor: SpecialSaveActor? = nil private var specialOpenActor: SpecialOpenActor? = nil + private var smartClickActor: SmartClickActor? = nil // Monitors private let mouseEventMonitor = MouseEventMonitor() @@ -104,6 +105,13 @@ final class AppState: ObservableObject { modalManager: modalManager, appContextManager: appContextManager ) + self.smartClickActor = SmartClickActor( + intentManager: intentManager, + clientManager: clientManager, + promptManager: promptManager, + modalManager: modalManager, + appContextManager: appContextManager + ) // Set lazy params // TODO: Use a dependency injection framework or encapsulate these managers @@ -197,9 +205,6 @@ final class AppState: ObservableObject { mouseEventMonitor.onLeftMouseDown = { [weak self] in self?.mouseEventMonitor.mouseClicked = true - // If the toast window is open and the user clicks out, - // we can close the window. - // NOTE: If the user has chatted, then keep it open. if let window = self?.modalManager.toastWindow, (self?.modalManager.messages.count ?? 0) < 2 { let mouseLocation = NSEvent.mouseLocation @@ -215,6 +220,19 @@ final class AppState: ObservableObject { appVersion = getAppVersion() startCheckingForUpdates() + + NotificationCenter.default.addObserver( + self, + selector: #selector(self.smartClickWrapper(_:)), + name: .smartClick, + object: nil + ) + } + + @objc private func smartClickWrapper(_ notification: NSNotification) { + Task { + try await smartClickActor?.smartClick() + } } deinit { diff --git a/TypeaheadAI/ClientManager.swift b/TypeaheadAI/ClientManager.swift index ee637f7..ff9ac78 100644 --- a/TypeaheadAI/ClientManager.swift +++ b/TypeaheadAI/ClientManager.swift @@ -80,7 +80,7 @@ class ClientManager: ObservableObject { userObjective: String?, userBio: String, userLang: String, - copiedText: String, + copiedText: String?, messages: [Message], history: [Message]?, appContext: AppContext?, @@ -268,7 +268,7 @@ class ClientManager: ObservableObject { userObjective: String?, userBio: String, userLang: String, - copiedText: String, + copiedText: String?, messages: [Message], history: [Message]?, appContext: AppContext?, @@ -573,7 +573,7 @@ struct RequestPayload: Codable { var userObjective: String? var userBio: String var userLang: String - var copiedText: String + var copiedText: String? var messages: [Message]? var history: [Message]? var appContext: AppContext? diff --git a/TypeaheadAI/CrudManagers/IntentManager.swift b/TypeaheadAI/CrudManagers/IntentManager.swift index 655c60f..6a1bca4 100644 --- a/TypeaheadAI/CrudManagers/IntentManager.swift +++ b/TypeaheadAI/CrudManagers/IntentManager.swift @@ -34,7 +34,7 @@ class IntentManager { @MainActor func addIntentEntry( prompt: String, - copiedText: String, + copiedText: String?, appContext: AppContext? ) { let newEntry = IntentEntry(context: context) diff --git a/TypeaheadAI/Monitors/MouseClickMonitor.swift b/TypeaheadAI/Monitors/MouseClickMonitor.swift index 38e32cf..d89c167 100644 --- a/TypeaheadAI/Monitors/MouseClickMonitor.swift +++ b/TypeaheadAI/Monitors/MouseClickMonitor.swift @@ -25,7 +25,7 @@ class MouseEventMonitor { func startMonitoring() { logger.debug("Starting to monitor mouse clicks.") mouseEventMonitor = NSEvent.addGlobalMonitorForEvents( - matching: [.leftMouseDown, .leftMouseUp], + matching: [.leftMouseDown, .leftMouseUp, .rightMouseDown], handler: { [weak self] event in switch event.type { case .leftMouseDown: @@ -38,6 +38,8 @@ class MouseEventMonitor { self?.mouseDragged = true } } + case .rightMouseDown: + print("detected") default: break } diff --git a/TypeaheadAI/Notification+Extension.swift b/TypeaheadAI/Notification+Extension.swift index fafd314..f145bbc 100644 --- a/TypeaheadAI/Notification+Extension.swift +++ b/TypeaheadAI/Notification+Extension.swift @@ -8,6 +8,8 @@ import Foundation extension Notification.Name { + static let smartClick = Notification.Name("smartClick") + static let smartCopyPerformed = Notification.Name("smartCopyPerformed") static let startOnboarding = Notification.Name("startOnboarding") diff --git a/TypeaheadAI/Views/Onboarding/OnboardingView.swift b/TypeaheadAI/Views/Onboarding/OnboardingView.swift index d236c8a..2fd45f4 100644 --- a/TypeaheadAI/Views/Onboarding/OnboardingView.swift +++ b/TypeaheadAI/Views/Onboarding/OnboardingView.swift @@ -68,7 +68,8 @@ struct OnboardingView: View { let appContext = AppContext( appName: "TypeaheadAI", bundleIdentifier: "ai.typeahead.TypeaheadAI", - url: nil + url: nil, + mousePos: NSEvent.mouseLocation ) if self.intentManager.fetchContextualIntents( limit: 1, appContext: appContext @@ -79,7 +80,8 @@ struct OnboardingView: View { appContext: AppContext( appName: "TypeaheadAI", bundleIdentifier: "ai.typeahead.TypeaheadAI", - url: nil + url: nil, + mousePos: NSEvent.mouseLocation ) ) } diff --git a/TypeaheadAI/WindowManagers/ModalManager.swift b/TypeaheadAI/WindowManagers/ModalManager.swift index e8e33b0..12064be 100644 --- a/TypeaheadAI/WindowManagers/ModalManager.swift +++ b/TypeaheadAI/WindowManagers/ModalManager.swift @@ -413,7 +413,7 @@ class ModalManager: ObservableObject { } @MainActor - func showModal() { + func showModal(mousePos: NSPoint? = nil) { toastWindow?.close() // Create the visual effect view with frosted glass effect @@ -469,8 +469,14 @@ class ModalManager: ObservableObject { hostingView.trailingAnchor.constraint(equalTo: baseView.trailingAnchor) ]) - // Set the x, y coordinates and the size to the user's last preference or the center by default - if let x = toastX, let y = toastY { + if let mousePos = mousePos { + toastWindow?.setFrame(NSRect( + x: mousePos.x + 50, + y: mousePos.y + 50, + width: toastWidth, + height: toastHeight + ), display: true) + } else if let x = toastX, let y = toastY { toastWindow?.setFrame(NSRect(x: x, y: y, width: toastWidth, height: toastHeight), display: true) } else { toastWindow?.setFrame(NSRect(x: 0, y: 0, width: toastWidth, height: toastHeight), display: true)