From f864cbdd70a8a2607c4eec567934dde7692709f2 Mon Sep 17 00:00:00 2001 From: kchro3 Date: Thu, 14 Dec 2023 23:17:39 -0800 Subject: [PATCH] experiment w/ different ui performance changes --- TypeaheadAI.xcodeproj/project.pbxproj | 8 + TypeaheadAI/Actors/SpecialCopyActor.swift | 18 +- TypeaheadAI/Actors/SpecialOpenActor.swift | 5 +- TypeaheadAI/AppContextManager.swift | 12 +- TypeaheadAI/ClientManager.swift | 27 +- .../Extensions/AXUIElement+Extension.swift | 7 + .../FunctionManager+PerformUIAction.swift | 29 +- TypeaheadAI/LayoutManager.swift | 14 + TypeaheadAI/Models/AppContext.swift | 2 +- TypeaheadAI/Models/UIElement.swift | 47 ++- TypeaheadAI/Traits/CanGetUIElements.swift | 15 +- TypeaheadAI/Traits/CanSimulateCopy.swift | 4 +- TypeaheadAI/Traits/CanSimulateEnter.swift | 30 ++ .../Views/Modal/Message/ChatBubble.swift | 3 - .../QuickActions/QuickActionDetails.swift | 268 ++++++++---------- TypeaheadAI/WindowManagers/ModalManager.swift | 3 +- 16 files changed, 301 insertions(+), 191 deletions(-) create mode 100644 TypeaheadAI/LayoutManager.swift create mode 100644 TypeaheadAI/Traits/CanSimulateEnter.swift diff --git a/TypeaheadAI.xcodeproj/project.pbxproj b/TypeaheadAI.xcodeproj/project.pbxproj index 95388a9..d8d3ffd 100644 --- a/TypeaheadAI.xcodeproj/project.pbxproj +++ b/TypeaheadAI.xcodeproj/project.pbxproj @@ -102,6 +102,7 @@ 2BCB017D2A9EF9A6009F9FAC /* RequestStatus.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BCB017C2A9EF9A6009F9FAC /* RequestStatus.swift */; }; 2BCF84352A9DD90F00359841 /* HistoryManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BCF84342A9DD90F00359841 /* HistoryManager.swift */; }; 2BCF843A2A9DE6DA00359841 /* GeneralSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BCF84392A9DE6DA00359841 /* GeneralSettingsView.swift */; }; + 2BD3821C2B2B7A0100F96C19 /* LayoutManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BD3821B2B2B7A0100F96C19 /* LayoutManager.swift */; }; 2BDA45C32ABEE840006128BC /* MessageView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDA45C22ABEE840006128BC /* MessageView.swift */; }; 2BDDB9892B27DDE100D52BF0 /* SwiftSoup in Frameworks */ = {isa = PBXBuildFile; productRef = 2BDDB9882B27DDE100D52BF0 /* SwiftSoup */; }; 2BDDB98B2B27DDFF00D52BF0 /* String+XMLMarkdown.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDDB98A2B27DDFF00D52BF0 /* String+XMLMarkdown.swift */; }; @@ -110,6 +111,7 @@ 2BDDB9932B28341C00D52BF0 /* FunctionManager+OpenApplication.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDDB9922B28341C00D52BF0 /* FunctionManager+OpenApplication.swift */; }; 2BDDB9952B2834B100D52BF0 /* FunctionManager+OpenURL.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDDB9942B2834B100D52BF0 /* FunctionManager+OpenURL.swift */; }; 2BDDB9972B28352800D52BF0 /* FunctionManager+PerformUIAction.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDDB9962B28352800D52BF0 /* FunctionManager+PerformUIAction.swift */; }; + 2BDDB9992B2AAE8000D52BF0 /* CanSimulateEnter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BDDB9982B2AAE8000D52BF0 /* CanSimulateEnter.swift */; }; 2BE0EC222AA0956C00E47C52 /* ModalView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BE0EC212AA0956C00E47C52 /* ModalView.swift */; }; 2BE0EC272AA17F9100E47C52 /* MouseClickMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BE0EC262AA17F9100E47C52 /* MouseClickMonitor.swift */; }; 2BE7BB882B258B4C00164F88 /* CanGetUIElements.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2BE7BB872B258B4C00164F88 /* CanGetUIElements.swift */; }; @@ -233,6 +235,7 @@ 2BCB017C2A9EF9A6009F9FAC /* RequestStatus.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RequestStatus.swift; sourceTree = ""; }; 2BCF84342A9DD90F00359841 /* HistoryManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HistoryManager.swift; sourceTree = ""; }; 2BCF84392A9DE6DA00359841 /* GeneralSettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneralSettingsView.swift; sourceTree = ""; }; + 2BD3821B2B2B7A0100F96C19 /* LayoutManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LayoutManager.swift; sourceTree = ""; }; 2BDA45C22ABEE840006128BC /* MessageView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MessageView.swift; sourceTree = ""; }; 2BDDB98A2B27DDFF00D52BF0 /* String+XMLMarkdown.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+XMLMarkdown.swift"; sourceTree = ""; }; 2BDDB98C2B282AFF00D52BF0 /* AppManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppManager.swift; sourceTree = ""; }; @@ -240,6 +243,7 @@ 2BDDB9922B28341C00D52BF0 /* FunctionManager+OpenApplication.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "FunctionManager+OpenApplication.swift"; sourceTree = ""; }; 2BDDB9942B2834B100D52BF0 /* FunctionManager+OpenURL.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "FunctionManager+OpenURL.swift"; sourceTree = ""; }; 2BDDB9962B28352800D52BF0 /* FunctionManager+PerformUIAction.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "FunctionManager+PerformUIAction.swift"; sourceTree = ""; }; + 2BDDB9982B2AAE8000D52BF0 /* CanSimulateEnter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CanSimulateEnter.swift; sourceTree = ""; }; 2BE0EC212AA0956C00E47C52 /* ModalView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalView.swift; sourceTree = ""; }; 2BE0EC242AA17DB600E47C52 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 2BE0EC262AA17F9100E47C52 /* MouseClickMonitor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MouseClickMonitor.swift; sourceTree = ""; }; @@ -382,6 +386,7 @@ 2B8CD4A12B09C3A8003E0589 /* CanScreenshot.swift */, 2B11FCE02B0C468100325F38 /* CanSimulateClose.swift */, 2B27450F2AB03A3D00F37D3E /* CanSimulateCopy.swift */, + 2BDDB9982B2AAE8000D52BF0 /* CanSimulateEnter.swift */, 2B3792302AB83739008D812F /* CanSimulatePaste.swift */, 2B11FCE22B0C47FA00325F38 /* CanSimulateSelectAll.swift */, 2BE7BB872B258B4C00164F88 /* CanGetUIElements.swift */, @@ -451,6 +456,7 @@ 2BE0EC242AA17DB600E47C52 /* Info.plist */, 2BA7F0862A9ABBA8003D38BA /* TypeaheadAI.entitlements */, 2B3435022B1EA33500423EE8 /* String+HTMLParser.swift */, + 2BD3821B2B2B7A0100F96C19 /* LayoutManager.swift */, ); path = TypeaheadAI; sourceTree = ""; @@ -770,6 +776,7 @@ 2BA7F0B32A9ABCBF003D38BA /* MenuView.swift in Sources */, 2B92BDBB2AA3D10800E65CFA /* ModalManager.swift in Sources */, 2BCF843A2A9DE6DA00359841 /* GeneralSettingsView.swift in Sources */, + 2BD3821C2B2B7A0100F96C19 /* LayoutManager.swift in Sources */, 2B0116F32AF98D68000C78E1 /* LoggedOutAccountView.swift in Sources */, 2B33D87D2AAC3330001193A2 /* ProfileView.swift in Sources */, 2BA7F0B52A9ABCD7003D38BA /* QuickActionManager.swift in Sources */, @@ -791,6 +798,7 @@ 2B3792312AB83739008D812F /* CanSimulatePaste.swift in Sources */, 2B8CD4942B05D278003E0589 /* CanPerformOCR.swift in Sources */, 2B11FCE32B0C47FA00325F38 /* CanSimulateSelectAll.swift in Sources */, + 2BDDB9992B2AAE8000D52BF0 /* CanSimulateEnter.swift in Sources */, 2B1F20462B1987E400152F13 /* ConversationManager.swift in Sources */, 2B3FAC232AAAF44D00B2D405 /* LlamaWrapper.swift in Sources */, 2BDA45C32ABEE840006128BC /* MessageView.swift in Sources */, diff --git a/TypeaheadAI/Actors/SpecialCopyActor.swift b/TypeaheadAI/Actors/SpecialCopyActor.swift index d0d6497..9e427fb 100644 --- a/TypeaheadAI/Actors/SpecialCopyActor.swift +++ b/TypeaheadAI/Actors/SpecialCopyActor.swift @@ -9,7 +9,7 @@ import Foundation import SwiftUI import os.log -actor SpecialCopyActor: CanSimulateCopy, CanPerformOCR { +actor SpecialCopyActor: CanSimulateCopy, CanPerformOCR, CanGetUIElements { private let intentManager: IntentManager private let historyManager: HistoryManager private let clientManager: ClientManager @@ -18,6 +18,7 @@ actor SpecialCopyActor: CanSimulateCopy, CanPerformOCR { private let appContextManager: AppContextManager @AppStorage("numSmartCopies") var numSmartCopies: Int? + @AppStorage("isAutopilotEnabled") private var isAutopilotEnabled: Bool = true private let logger = Logger( subsystem: "ai.typeahead.TypeaheadAI", @@ -45,7 +46,7 @@ actor SpecialCopyActor: CanSimulateCopy, CanPerformOCR { try await self.simulateCopy() // Clear the current state - try await self.modalManager.forceRefresh() + await self.modalManager.forceRefresh() await self.modalManager.showModal() var messageType: MessageType = .string @@ -72,6 +73,19 @@ actor SpecialCopyActor: CanSimulateCopy, CanPerformOCR { appInfo.appContext?.ocrText = ocrText } + // Serialize the UIElement + if isAutopilotEnabled { + let (uiElement, elementMap) = getUIElements(appContext: appInfo.appContext) + if let serializedUIElement = uiElement?.serialize( + excludedRoles: ["AXImage"], + excludedActions: ["AXShowMenu", "AXScrollToVisible", "AXCancel", "AXRaise"] + ) { + print(serializedUIElement) + appInfo.appContext?.serializedUIElement = serializedUIElement + appInfo.elementMap = elementMap + } + } + // Try to predict the user intent let contextualIntents = self.intentManager.fetchContextualIntents(limit: 10, appContext: appInfo.appContext) await self.modalManager.setUserIntents(intents: contextualIntents) diff --git a/TypeaheadAI/Actors/SpecialOpenActor.swift b/TypeaheadAI/Actors/SpecialOpenActor.swift index 0dee6d5..cd4b559 100644 --- a/TypeaheadAI/Actors/SpecialOpenActor.swift +++ b/TypeaheadAI/Actors/SpecialOpenActor.swift @@ -8,6 +8,7 @@ import AppKit import CoreServices import Foundation +import SwiftUI import os.log actor SpecialOpenActor: CanPerformOCR, CanGetUIElements { @@ -39,11 +40,9 @@ actor SpecialOpenActor: CanPerformOCR, CanGetUIElements { func specialOpen(forceRefresh: Bool = false) async throws { var appInfo = try await self.appContextManager.getActiveAppInfo() -// hack() - if forceRefresh { self.logger.debug("special new") - try await self.modalManager.forceRefresh() + await self.modalManager.forceRefresh() await self.modalManager.showModal() await NSApp.activate(ignoringOtherApps: true) } else { diff --git a/TypeaheadAI/AppContextManager.swift b/TypeaheadAI/AppContextManager.swift index c66fa66..fee196a 100644 --- a/TypeaheadAI/AppContextManager.swift +++ b/TypeaheadAI/AppContextManager.swift @@ -8,6 +8,7 @@ import AppKit import Foundation import SwiftUI +import UserNotifications import Vision import os.log @@ -39,16 +40,7 @@ class AppContextManager: CanFetchAppContext, CanScreenshot, CanGetUIElements { // NOTE: Take screenshot and store reference. We can apply the OCR when we make the network request. appContext.screenshotPath = try await screenshot() appContext.url = await getUrl(bundleIdentifier: appContext.bundleIdentifier) - if isAutopilotEnabled { - let (uiElement, elementMap) = getUIElements(appContext: appContext) - if let serializedUIElement = uiElement?.serialize(excludedActions: ["AXShowMenu", "AXScrollToVisible", "AXCancel", "AXRaise"]) { - appContext.serializedUIElement = serializedUIElement - } - - return AppInfo(appContext: appContext, elementMap: elementMap, apps: appManager.getApps()) - } else { - return AppInfo(appContext: appContext, elementMap: ElementMap(), apps: appManager.getApps()) - } + return AppInfo(appContext: appContext, elementMap: ElementMap(), apps: appManager.getApps()) } private func getUrl(bundleIdentifier: String?) async -> URL? { diff --git a/TypeaheadAI/ClientManager.swift b/TypeaheadAI/ClientManager.swift index 2df7d61..f39c054 100644 --- a/TypeaheadAI/ClientManager.swift +++ b/TypeaheadAI/ClientManager.swift @@ -12,7 +12,7 @@ import os.log import SwiftUI import Supabase -class ClientManager: ObservableObject { +class ClientManager: ObservableObject, CanGetUIElements { var llamaModelManager: LlamaModelManager? = nil var promptManager: QuickActionManager? = nil var appContextManager: AppContextManager? = nil @@ -22,7 +22,7 @@ class ClientManager: ObservableObject { private let session: URLSession - private let version: String = "v9" + private let version: String = "v10" private let validFinishReasons: [String] = [ "stop", "function_call", @@ -180,18 +180,34 @@ class ClientManager: ObservableObject { func refine( messages: [Message], incognitoMode: Bool, - quickAction: QuickAction? = nil, - timeout: TimeInterval = 60, + quickActionId: UUID? = nil, + timeout: TimeInterval = 120, streamHandler: @escaping (Result, AppInfo?) async -> Void, completion: @escaping (Result, AppInfo?) async -> Void ) async throws { self.logger.info("incognito: \(incognitoMode)") - let appInfo = try await appContextManager?.getActiveAppInfo() + var appInfo = try await appContextManager?.getActiveAppInfo() + + // Serialize the UIElement + if isAutopilotEnabled { + let (uiElement, elementMap) = getUIElements(appContext: appInfo?.appContext) + if let serializedUIElement = uiElement?.serialize( + excludedRoles: ["AXImage"], + excludedActions: ["AXShowMenu", "AXScrollToVisible", "AXCancel", "AXRaise"] + ) { + appInfo?.appContext?.serializedUIElement = serializedUIElement + appInfo?.elementMap = elementMap + } + } + if let (key, _) = cached, let data = key.data(using: .utf8), let payload = try? JSONDecoder().decode(RequestPayload.self, from: data), let appContext = appInfo?.appContext { var history: [Message]? = nil + + // NOTE: Need to fetch again in case the Quick Action has been edited + let quickAction: QuickAction? = quickActionId.flatMap { self.promptManager?.getById($0) } if let quickAction = quickAction { history = self.historyManager?.fetchHistoryEntriesAsMessages(limit: 10, appContext: payload.appContext, quickActionID: quickAction.id) @@ -536,6 +552,7 @@ class ClientManager: ObservableObject { var messageCopy = originalMessage messageCopy.appContext?.screenshotPath = nil messageCopy.appContext?.ocrText = nil + messageCopy.appContext?.serializedUIElement = nil return messageCopy } } diff --git a/TypeaheadAI/Extensions/AXUIElement+Extension.swift b/TypeaheadAI/Extensions/AXUIElement+Extension.swift index 553abee..8fbc17a 100644 --- a/TypeaheadAI/Extensions/AXUIElement+Extension.swift +++ b/TypeaheadAI/Extensions/AXUIElement+Extension.swift @@ -89,4 +89,11 @@ extension AXUIElement { return actions } + + func isFocused() -> Bool { + guard let value = self.value(forAttribute: kAXFocusedAttribute) as? Bool else { + return false + } + return value + } } diff --git a/TypeaheadAI/Functions/FunctionManager+PerformUIAction.swift b/TypeaheadAI/Functions/FunctionManager+PerformUIAction.swift index 99615ae..ce1bcc3 100644 --- a/TypeaheadAI/Functions/FunctionManager+PerformUIAction.swift +++ b/TypeaheadAI/Functions/FunctionManager+PerformUIAction.swift @@ -13,6 +13,7 @@ struct Action: Identifiable, Codable { let action: String let narration: String let inputText: String? + let pressEnter: Bool? } extension FunctionCall { @@ -27,7 +28,7 @@ extension FunctionCall { } } -extension FunctionManager { +extension FunctionManager: CanSimulateEnter { func performUIAction(_ functionCall: FunctionCall, appInfo: AppInfo?, modalManager: ModalManager) async throws { let appContext = appInfo?.appContext @@ -47,6 +48,7 @@ extension FunctionManager { ) try await Task.sleep(for: .seconds(3)) + try Task.checkCancellation() await modalManager.closeModal() @@ -57,7 +59,8 @@ extension FunctionManager { } for (index, action) in actions.enumerated() { - print(index, action) + try Task.checkCancellation() + guard let axElement = elementMap[action.id] else { // TERMINATE on invalid action await modalManager.showModal() @@ -67,6 +70,7 @@ extension FunctionManager { _ = AXUIElementPerformAction(axElement, "AXScrollToVisible" as CFString) try await Task.sleep(for: .milliseconds(100)) + try Task.checkCancellation() var result: AXError? = nil if action.action == "" { @@ -87,7 +91,8 @@ extension FunctionManager { result = AXUIElementPerformAction(axElement, action.action as CFString) } - try await Task.sleep(for: .seconds(1)) + try await Task.sleep(for: .milliseconds(100)) + try Task.checkCancellation() guard result == .success else { // TERMINATE on failure @@ -120,21 +125,31 @@ extension FunctionManager { } else { NSPasteboard.general.clearContents() NSPasteboard.general.setString(inputText, forType: .string) - try await Task.sleep(for: .seconds(1)) + try await Task.sleep(for: .milliseconds(100)) + try Task.checkCancellation() try await simulatePaste() + + if action.pressEnter ?? false { + try await simulateEnter() + } } } else { NSPasteboard.general.clearContents() NSPasteboard.general.setString(inputText, forType: .string) - try await Task.sleep(for: .seconds(1)) + try await Task.sleep(for: .milliseconds(100)) + try Task.checkCancellation() try await simulateSelectAll() try await simulatePaste() + + if action.pressEnter ?? false { + try await simulateEnter() + } } + } else { + try await Task.sleep(for: .milliseconds(100)) } - - try await Task.sleep(for: .seconds(1)) } // NOTE: Probably a good idea, but it doesn't work well in practice... diff --git a/TypeaheadAI/LayoutManager.swift b/TypeaheadAI/LayoutManager.swift new file mode 100644 index 0000000..c83aaf5 --- /dev/null +++ b/TypeaheadAI/LayoutManager.swift @@ -0,0 +1,14 @@ +// +// LayoutManager.swift +// TypeaheadAI +// +// Created by Jeff Hara on 12/14/23. +// + +import Foundation + +class LayoutManager { + init() { + + } +} diff --git a/TypeaheadAI/Models/AppContext.swift b/TypeaheadAI/Models/AppContext.swift index 6f532f4..58c8e53 100644 --- a/TypeaheadAI/Models/AppContext.swift +++ b/TypeaheadAI/Models/AppContext.swift @@ -10,9 +10,9 @@ import Foundation struct AppContext: Codable, Equatable { let appName: String? let bundleIdentifier: String? + let pid: pid_t? var url: URL? = nil var screenshotPath: String? = nil var ocrText: String? = nil - var pid: pid_t? = nil var serializedUIElement: String? = nil } diff --git a/TypeaheadAI/Models/UIElement.swift b/TypeaheadAI/Models/UIElement.swift index a4c76e3..421f266 100644 --- a/TypeaheadAI/Models/UIElement.swift +++ b/TypeaheadAI/Models/UIElement.swift @@ -18,8 +18,10 @@ struct UIElement: Identifiable, Codable, Equatable { let link: URL? let point: CGPoint? let size: CGSize? + let isFocused: Bool let actions: [String] + let parentRole: String? let children: [UIElement] var shortId: String { @@ -38,6 +40,7 @@ extension UIElement { self.role = role self.point = element.pointValue(forAttribute: kAXPositionAttribute) self.size = element.sizeValue(forAttribute: kAXSizeAttribute) + self.isFocused = element.isFocused() if let titleAttr = element.stringValue(forAttribute: kAXTitleAttribute), !titleAttr.isEmpty { self.title = titleAttr @@ -65,6 +68,8 @@ extension UIElement { self.link = element.value(forAttribute: kAXURLAttribute) as? URL self.actions = element.actions() + + self.parentRole = element.parent()?.stringValue(forAttribute: kAXRoleAttribute) if let children = element.value(forAttribute: kAXChildrenAttribute) as? [AXUIElement] { self.children = children.compactMap { UIElement(from: $0, callback: callback) } } else { @@ -83,10 +88,11 @@ extension UIElement { isVisible: Bool = true, isIndexed: Bool = true, showActions: Bool = true, - excludedActions: [String]? = nil, - showGroups: Bool = false + excludedRoles: [String]? = nil, + excludedActions: [String]? = nil ) -> String? { - guard showGroups || self.role != "AXGroup" else { + if self.role == "AXGroup", self.parentRole == "AXGroup" { + // Collapse nested AXGroups var line = "" for child in self.children { if let childLine = child.serialize( @@ -94,8 +100,29 @@ extension UIElement { isVisible: isVisible, isIndexed: isIndexed, showActions: showActions, - excludedActions: excludedActions, - showGroups: showGroups + excludedRoles: excludedRoles, + excludedActions: excludedActions + ), !childLine.isEmpty { + if line.isEmpty { + line = childLine + } else { + line += "\n\(childLine)" + } + } + } + return line + } + + guard !(excludedRoles ?? []).contains(self.role) else { + var line = "" + for child in self.children { + if let childLine = child.serialize( + indent: indent, + isVisible: isVisible, + isIndexed: isIndexed, + showActions: showActions, + excludedRoles: excludedRoles, + excludedActions: excludedActions ), !childLine.isEmpty { if line.isEmpty { line = childLine @@ -149,6 +176,10 @@ extension UIElement { } } + if self.isFocused { + text += ", (in focus)" + } + var line = "" if isIndexed { line += "\(indentation)\(self.shortId): \(text)" @@ -159,7 +190,7 @@ extension UIElement { if let excludedActions = excludedActions { let actions = self.actions.filter { !excludedActions.contains($0) } if !actions.isEmpty { - line += ", actions: \(self.actions)" + line += ", actions: \(actions)" } } else { line += ", actions: \(self.actions)" @@ -172,8 +203,8 @@ extension UIElement { isVisible: isVisible, isIndexed: isIndexed, showActions: showActions, - excludedActions: excludedActions, - showGroups: showGroups + excludedRoles: excludedRoles, + excludedActions: excludedActions ), !childLine.isEmpty { line += "\n\(childLine)" } diff --git a/TypeaheadAI/Traits/CanGetUIElements.swift b/TypeaheadAI/Traits/CanGetUIElements.swift index e2487c7..0ec8662 100644 --- a/TypeaheadAI/Traits/CanGetUIElements.swift +++ b/TypeaheadAI/Traits/CanGetUIElements.swift @@ -9,11 +9,15 @@ import AppKit import Foundation protocol CanGetUIElements { + func getRootElement(appContext: AppContext?) -> AXUIElement? + func getUIElements(appContext: AppContext?) -> (UIElement?, ElementMap) + + func getUIElements(element: AXUIElement?) -> (UIElement?, ElementMap) } extension CanGetUIElements { - func getUIElements(appContext: AppContext?) -> (UIElement?, ElementMap) { + func getRootElement(appContext: AppContext?) -> AXUIElement? { var element: AXUIElement? = nil if let appContext = appContext, let pid = appContext.pid { element = AXUIElementCreateApplication(pid) @@ -21,6 +25,10 @@ extension CanGetUIElements { element = AXUIElementCreateSystemWide() } + return element + } + + func getUIElements(element: AXUIElement?) -> (UIElement?, ElementMap) { var elementMap = ElementMap() if let element = element, let uiElement = UIElement(from: element, callback: { uuid, element in elementMap[uuid] = element @@ -30,4 +38,9 @@ extension CanGetUIElements { return (nil, ElementMap()) } } + + func getUIElements(appContext: AppContext?) -> (UIElement?, ElementMap) { + let element = getRootElement(appContext: appContext) + return getUIElements(element: element) + } } diff --git a/TypeaheadAI/Traits/CanSimulateCopy.swift b/TypeaheadAI/Traits/CanSimulateCopy.swift index 057fd21..663546a 100644 --- a/TypeaheadAI/Traits/CanSimulateCopy.swift +++ b/TypeaheadAI/Traits/CanSimulateCopy.swift @@ -21,9 +21,9 @@ extension CanSimulateCopy { func simulateCopy() async throws { // Post a Command-C keystroke let source = CGEventSource(stateID: .hidSystemState)! - let cmdCDown = CGEvent(keyboardEventSource: source, virtualKey: 0x08, keyDown: true)! // c key + let cmdCDown = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(kVK_ANSI_C), keyDown: true)! cmdCDown.flags = [.maskCommand] - let cmdCUp = CGEvent(keyboardEventSource: source, virtualKey: 0x08, keyDown: false)! // c key + let cmdCUp = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(kVK_ANSI_C), keyDown: false)! cmdCUp.flags = [.maskCommand] let changeCount = NSPasteboard.general.changeCount diff --git a/TypeaheadAI/Traits/CanSimulateEnter.swift b/TypeaheadAI/Traits/CanSimulateEnter.swift new file mode 100644 index 0000000..551f161 --- /dev/null +++ b/TypeaheadAI/Traits/CanSimulateEnter.swift @@ -0,0 +1,30 @@ +// +// CanSimulateEnter.swift +// TypeaheadAI +// +// Created by Jeff Hara on 12/13/23. +// + +import Foundation +import Carbon.HIToolbox + +protocol CanSimulateEnter { + func simulateEnter() async throws +} + +extension CanSimulateEnter { + func simulateEnter() async throws { + // Post a Enter keystroke + let source = CGEventSource(stateID: .hidSystemState)! + let keyDown = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(kVK_Return), keyDown: true)! + keyDown.flags = [] + + let keyUp = CGEvent(keyboardEventSource: source, virtualKey: CGKeyCode(kVK_Return), keyDown: false)! + keyUp.flags = [] + + keyDown.post(tap: .cghidEventTap) + try await Task.sleep(for: .milliseconds(100)) + keyUp.post(tap: .cghidEventTap) + try await Task.sleep(for: .milliseconds(100)) + } +} diff --git a/TypeaheadAI/Views/Modal/Message/ChatBubble.swift b/TypeaheadAI/Views/Modal/Message/ChatBubble.swift index fb9cefa..e6decc6 100644 --- a/TypeaheadAI/Views/Modal/Message/ChatBubble.swift +++ b/TypeaheadAI/Views/Modal/Message/ChatBubble.swift @@ -41,10 +41,7 @@ struct ChatBubble: View where Content: View { @ViewBuilder var userMessage: some View { HStack(alignment: .bottom) { - Spacer() - userButtons - .padding(.leading, 10) content() .clipShape(ChatBubbleShape(direction: direction)) diff --git a/TypeaheadAI/Views/Settings/QuickActions/QuickActionDetails.swift b/TypeaheadAI/Views/Settings/QuickActions/QuickActionDetails.swift index dfc8861..b7f6d3f 100644 --- a/TypeaheadAI/Views/Settings/QuickActions/QuickActionDetails.swift +++ b/TypeaheadAI/Views/Settings/QuickActions/QuickActionDetails.swift @@ -56,181 +56,154 @@ struct QuickActionDetails: View { @ViewBuilder var readWriteView: some View { - ScrollView { - VStack { - // Read-Write Header - HStack { - Button(action: { - isEditing = false - onDelete?() - }, label: { - HStack { - Image(systemName: "trash.fill") - .foregroundStyle(.white) - Text("Delete") - .foregroundStyle(.white) - } - .padding(.vertical, 5) - .padding(.horizontal, 10) - .background(RoundedRectangle(cornerRadius: 15) - .fill(.red)) - }) - .buttonStyle(.plain) - - Spacer() - - Button(action: { - isEditing = false - }, label: { - Text("Cancel") - .padding(.vertical, 5) - .padding(.horizontal, 10) - .background(RoundedRectangle(cornerRadius: 15) - .fill(colorScheme == .dark ? .black.opacity(0.2) : .secondary.opacity(0.15)) - ) - }) - .buttonStyle(.plain) - - Button(action: { - isEditing = false - onSubmit?( - mutableLabel, - mutableDetails - ) - }, label: { - Text("Save") - .foregroundStyle(.white) - .padding(.vertical, 5) - .padding(.horizontal, 10) - .background(RoundedRectangle(cornerRadius: 15) - .fill(Color.accentColor)) - }) - .buttonStyle(.plain) - } - .frame(maxWidth: .infinity) - + VStack { + ScrollView { // Body VStack(alignment: .leading) { + // Details + Text("Plan") + .font(.title3) + .foregroundStyle(Color.accentColor) - // Title - HStack { - Text("Name") - .frame(width: descWidth, alignment: .trailing) - - CustomTextField( - text: $mutableLabel, - placeholderText: quickAction.prompt ?? "Name of command", - autoCompleteSuggestions: [], - onEnter: { _ in }, - flushOnEnter: false - ) - .lineLimit(1) + TextEditor(text: $mutableDetails) + .font(.system(.body)) + .scrollContentBackground(.hidden) + .lineLimit(10) .padding(.vertical, 5) .padding(.horizontal, 10) - .background(RoundedRectangle(cornerRadius: 15) + .background(RoundedRectangle(cornerRadius: 10) .fill(colorScheme == .dark ? .black.opacity(0.2) : .secondary.opacity(0.15)) ) - } - - // Details - HStack { - Text("Prompt") - .frame(width: descWidth, alignment: .trailing) - - TextEditor(text: $mutableDetails) - .font(.system(.body)) - .scrollContentBackground(.hidden) - .lineLimit(nil) - .padding(.vertical, 5) - .padding(.horizontal, 10) - .background(RoundedRectangle(cornerRadius: 10) - .fill(colorScheme == .dark ? .black.opacity(0.2) : .secondary.opacity(0.15)) - ) - .frame(minHeight: 50) - } - - Divider() + .frame(minHeight: 60) Text("Examples") .font(.title3) .foregroundStyle(Color.accentColor) // Examples - VStack { - Table(history, selection: $selectedRow) { - TableColumn("Copied Text") { entry in - Text(entry.copiedText ?? "none") - } - TableColumn("Pasted Text") { entry in - Text(entry.pastedResponse ?? "none") - } + Table(history, selection: $selectedRow) { + TableColumn("Copied Text") { entry in + Text(entry.copiedText ?? "none") + } + TableColumn("Pasted Text") { entry in + Text(entry.pastedResponse ?? "none") + } + } + .contextMenu(menuItems: { + Button { + selectedRow = nil + isSheetPresented = true + } label: { + Text("Add New Example") } - .contextMenu(menuItems: { + + if let selectedRow = selectedRow, let _ = selectedRow { Button { - selectedRow = nil isSheetPresented = true } label: { - Text("Add New Example") + Text("Edit") } - if let selectedRow = selectedRow, let _ = selectedRow { - Button { - isSheetPresented = true - } label: { - Text("Edit") - } - - Button { - confirmDelete = true - } label: { - Text("Delete") - } + Button { + confirmDelete = true + } label: { + Text("Delete") } - }) - .sheet(isPresented: $isSheetPresented, onDismiss: { - isSheetPresented = false - }) { - QuickActionExampleForm( - selectedRow: selectedRow, - onFetch: self.fetchHistoryEntry, - onSubmit: { (copiedText, pastedText) in - self.upsertExample( - copiedText: copiedText, - pastedText: pastedText - ) - isSheetPresented = false - }, - onCancel: { - isSheetPresented = false - } - ) } - .onDeleteCommand(perform: { - confirmDelete = true - }) - .alert(isPresented: $confirmDelete) { - Alert( - title: Text("Are you sure you want to delete this example?"), - message: Text("If you delete this, TypeaheadAI will forget about this example, and this action cannot be undone."), - primaryButton: .destructive(Text("Delete")) { - deleteSelectedRow() - }, - secondaryButton: .cancel() - ) - } - .cornerRadius(10) + }) + .sheet(isPresented: $isSheetPresented, onDismiss: { + isSheetPresented = false + }) { + QuickActionExampleForm( + selectedRow: selectedRow, + onFetch: self.fetchHistoryEntry, + onSubmit: { (copiedText, pastedText) in + self.upsertExample( + copiedText: copiedText, + pastedText: pastedText + ) + isSheetPresented = false + }, + onCancel: { + isSheetPresented = false + } + ) } + .onDeleteCommand(perform: { + confirmDelete = true + }) + .alert(isPresented: $confirmDelete) { + Alert( + title: Text("Are you sure you want to delete this example?"), + message: Text("If you delete this, TypeaheadAI will forget about this example, and this action cannot be undone."), + primaryButton: .destructive(Text("Delete")) { + deleteSelectedRow() + }, + secondaryButton: .cancel() + ) + } + .cornerRadius(10) .frame(maxWidth: .infinity, minHeight: 150, maxHeight: .infinity, alignment: .leading) } - .padding(10) .frame( maxWidth: .infinity, maxHeight: .infinity, alignment: .leading ) } - .padding(15) + + // Read-Write Footer + HStack { + Button(action: { + isEditing = false + onDelete?() + }, label: { + HStack { + Image(systemName: "trash.fill") + .foregroundStyle(.white) + Text("Delete") + .foregroundStyle(.white) + } + .padding(.vertical, 5) + .padding(.horizontal, 10) + .background(RoundedRectangle(cornerRadius: 15) + .fill(.red)) + }) + .buttonStyle(.plain) + + Spacer() + + Button(action: { + isEditing = false + }, label: { + Text("Cancel") + .padding(.vertical, 5) + .padding(.horizontal, 10) + .background(RoundedRectangle(cornerRadius: 15) + .fill(colorScheme == .dark ? .black.opacity(0.2) : .secondary.opacity(0.15)) + ) + }) + .buttonStyle(.plain) + + Button(action: { + isEditing = false + onSubmit?( + mutableLabel, + mutableDetails + ) + }, label: { + Text("Save") + .foregroundStyle(.white) + .padding(.vertical, 5) + .padding(.horizontal, 10) + .background(RoundedRectangle(cornerRadius: 15) + .fill(Color.accentColor)) + }) + .buttonStyle(.plain) + } + .frame(maxWidth: .infinity) } + .padding(15) } @ViewBuilder @@ -268,7 +241,7 @@ struct QuickActionDetails: View { VStack(alignment: .leading) { // Details VStack(alignment: .leading, spacing: 5) { - Text("Prompt") + Text("Plan") .foregroundStyle(Color.accentColor) Text(quickAction.details ?? "") } @@ -298,15 +271,14 @@ struct QuickActionDetails: View { .fill(colorScheme == .dark ? .black.opacity(0.2) : .secondary.opacity(0.15)) ) } - .padding(10) .frame( maxWidth: .infinity, maxHeight: .infinity, alignment: .leading ) } - .padding(15) - } + } + .padding(15) } private func fetchHistoryEntry(uuid: UUID) -> HistoryEntry? { diff --git a/TypeaheadAI/WindowManagers/ModalManager.swift b/TypeaheadAI/WindowManagers/ModalManager.swift index 2fee473..53381ba 100644 --- a/TypeaheadAI/WindowManagers/ModalManager.swift +++ b/TypeaheadAI/WindowManagers/ModalManager.swift @@ -75,6 +75,7 @@ class ModalManager: ObservableObject { @MainActor func cancelTasks() { + self.isPending = false self.clientManager?.cancelStreamingTask() NotificationCenter.default.post( @@ -529,7 +530,7 @@ class ModalManager: ObservableObject { try await self.clientManager?.refine( messages: self.messages, incognitoMode: !online, - quickAction: quickAction, + quickActionId: quickAction?.id, streamHandler: defaultHandler, completion: defaultCompletionHandler )