Skip to content
Open

wip #198

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions TypeaheadAI.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
2B8CD4962B05FF59003E0589 /* ModalFooterView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD4952B05FF59003E0589 /* ModalFooterView.swift */; };
2B8CD4992B06DD36003E0589 /* Functions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD4982B06DD36003E0589 /* Functions.swift */; };
2B8CD49B2B076AE6003E0589 /* ActivateOnboardingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD49A2B076AE6003E0589 /* ActivateOnboardingView.swift */; };
2B8CD49F2B084779003E0589 /* SmartClickActor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B8CD49E2B084779003E0589 /* SmartClickActor.swift */; };
2B92BDB92AA3A2DD00E65CFA /* CustomModalWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */; };
2B92BDBB2AA3D10800E65CFA /* ModalManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */; };
2BA3C2352AADAC5700537F95 /* llama in Frameworks */ = {isa = PBXBuildFile; productRef = 2BA3C2342AADAC5700537F95 /* llama */; };
Expand Down Expand Up @@ -170,6 +171,7 @@
2B8CD4952B05FF59003E0589 /* ModalFooterView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalFooterView.swift; sourceTree = "<group>"; };
2B8CD4982B06DD36003E0589 /* Functions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Functions.swift; sourceTree = "<group>"; };
2B8CD49A2B076AE6003E0589 /* ActivateOnboardingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivateOnboardingView.swift; sourceTree = "<group>"; };
2B8CD49E2B084779003E0589 /* SmartClickActor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SmartClickActor.swift; sourceTree = "<group>"; };
2B92BDB82AA3A2DD00E65CFA /* CustomModalWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomModalWindow.swift; sourceTree = "<group>"; };
2B92BDBA2AA3D10800E65CFA /* ModalManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModalManager.swift; sourceTree = "<group>"; };
2BA3C2362AADAD9A00537F95 /* SpecialCopyActor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpecialCopyActor.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -280,6 +282,7 @@
2B3792302AB83739008D812F /* CanSimulatePaste.swift */,
2B0B300F2ACF8C8000338B76 /* SpecialOpenActor.swift */,
2B8CD4932B05D278003E0589 /* CanPerformOCR.swift */,
2B8CD49E2B084779003E0589 /* SmartClickActor.swift */,
);
path = Actors;
sourceTree = "<group>";
Expand Down Expand Up @@ -714,6 +717,7 @@
2B27450A2AB01CF400F37D3E /* SpecialSaveActor.swift in Sources */,
2B8CD4962B05FF59003E0589 /* ModalFooterView.swift in Sources */,
2B7D35842B01B14100E85AEF /* IntroOnboardingView.swift in Sources */,
2B8CD49F2B084779003E0589 /* SmartClickActor.swift in Sources */,
2B7D358A2B01ED5100E85AEF /* SmartCopyOnboardingView.swift in Sources */,
2BAFDB6B2AF60F64009C8370 /* MenuButtonView.swift in Sources */,
2B7BBAC22AF3347100E4CE1F /* NewQuickActionForm.swift in Sources */,
Expand Down
8 changes: 4 additions & 4 deletions TypeaheadAI/Actors/CanPerformOCR.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ private struct Constants {
}

protocol CanPerformOCR {
func performOCR(image: CGImage) async throws -> (String, NSImage?)
func performOCR(image: CGImage, level: VNRequestTextRecognitionLevel) async throws -> (String, NSImage?)
}

extension CanPerformOCR {
func performOCR(image: CGImage) async throws -> (String, NSImage?) {
func performOCR(image: CGImage, level: VNRequestTextRecognitionLevel = .accurate) async throws -> (String, NSImage?) {
try await withCheckedThrowingContinuation { continuation in
let request = VNRecognizeTextRequest { (request, error) in
if let error = error {
Expand All @@ -39,7 +39,7 @@ extension CanPerformOCR {
continuation.resume(returning: (allRecognizedText, imageWithBoxes))
}

request.recognitionLevel = .accurate
request.recognitionLevel = level
request.automaticallyDetectsLanguage = true

let handler = VNImageRequestHandler(cgImage: image, options: [:])
Expand Down Expand Up @@ -140,7 +140,7 @@ extension CanPerformOCR {
bitsPerComponent: image.bitsPerComponent,
bytesPerRow: image.bytesPerRow,
space: image.colorSpace ?? CGColorSpace(name: CGColorSpace.sRGB)!,
bitmapInfo: image.bitmapInfo.rawValue
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
) else {
return nil
}
Expand Down
98 changes: 98 additions & 0 deletions TypeaheadAI/Actors/SmartClickActor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//
// SmartClickActor.swift
// TypeaheadAI
//
// Created by Jeff Hara on 11/17/23.
//

import AppKit
import Foundation
import os.log

actor SmartClickActor: CanPerformOCR, CanSimulateCopy {
private let intentManager: IntentManager
private let clientManager: ClientManager
private let promptManager: QuickActionManager
private let modalManager: ModalManager
private let appContextManager: AppContextManager

private let logger = Logger(
subsystem: "ai.typeahead.TypeaheadAI",
category: "SmartClickActor"
)

init(
intentManager: IntentManager,
clientManager: ClientManager,
promptManager: QuickActionManager,
modalManager: ModalManager,
appContextManager: AppContextManager
) {
self.intentManager = intentManager
self.clientManager = clientManager
self.promptManager = promptManager
self.modalManager = modalManager
self.appContextManager = appContextManager
}

func smartClick() async throws {
var appContext = try await self.appContextManager.getActiveAppInfo()
await self.modalManager.forceRefresh()
await self.modalManager.showModal(mousePos: appContext?.mousePos)
await NSApp.activate(ignoringOtherApps: true)

// NOTE: Experimental:
// Get contents from Pasteboard (including Universal clipboard if phone is nearby)
var copiedText: String? = nil // NSPasteboard.general.string(forType: .string)

// Attempt to copy whatever is currently selected, overrides previously copied text
do {
try await simulateCopy()
copiedText = NSPasteboard.general.string(forType: .string)
} catch {
// no-op: if nothing was copied, then don't do anything
}

// Set the copied text as a new message
if let copiedText = copiedText {
await self.modalManager.setUserMessage(copiedText, messageType: .string)
}

// Set the OCR'ed text
if let screenshot = appContext?.screenshotPath.flatMap({ NSImage(contentsOfFile: $0)?.toCGImage() }) {
let (ocrText, _) = try await performOCR(image: screenshot)
appContext?.ocrText = ocrText
}

// Try to predict the user intent
let contextualIntents = self.intentManager.fetchContextualIntents(limit: 3, appContext: appContext)
await self.modalManager.setUserIntents(intents: contextualIntents)

// Kick off async
Task {
// Set the OCR'ed text
if let screenshot = appContext?.screenshotPath.flatMap({
NSImage(contentsOfFile: $0)?.toCGImage()
}) {
let (ocrText, _) = try await performOCR(image: screenshot)
appContext?.ocrText = ocrText
}

if let intents = try await self.clientManager.suggestIntents(
id: UUID(),
username: NSUserName(),
userFullName: NSFullUserName(),
userObjective: self.promptManager.getActivePrompt(),
userBio: UserDefaults.standard.string(forKey: "bio") ?? "",
userLang: Locale.preferredLanguages.first ?? "",
copiedText: copiedText,
messages: self.modalManager.messages,
history: [],
appContext: appContext,
incognitoMode: !self.modalManager.online
), !intents.intents.isEmpty {
await self.modalManager.appendUserIntents(intents: intents.intents)
}
}
}
}
17 changes: 12 additions & 5 deletions TypeaheadAI/AppContextManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ struct AppContext: Codable {
let appName: String?
let bundleIdentifier: String?
let url: URL?
let mousePos: NSPoint
var screenshotPath: String? = nil
var ocrText: String? = nil
}
Expand All @@ -28,10 +29,12 @@ class AppContextManager {
)

func getActiveAppInfo() async throws -> AppContext? {
guard let activeApp = NSWorkspace.shared.frontmostApplication else {
guard let activeApp = NSWorkspace.shared.menuBarOwningApplication else {
return nil
}

let mousePos = NSEvent.mouseLocation

let appName = activeApp.localizedName
let bundleIdentifier = activeApp.bundleIdentifier
self.logger.info("active app: \(bundleIdentifier ?? "<unk>")")
Expand All @@ -49,17 +52,21 @@ class AppContextManager {
appName: appName,
bundleIdentifier: bundleIdentifier,
url: strippedUrl,
mousePos: mousePos,
screenshotPath: screenshotPath
)
}
} catch {
self.logger.error("Failed to execute script: \(error.localizedDescription)")
}

return AppContext(appName: appName, bundleIdentifier: bundleIdentifier, url: nil)
} else {
return AppContext(appName: appName, bundleIdentifier: bundleIdentifier, url: nil)
}

return AppContext(
appName: appName,
bundleIdentifier: bundleIdentifier,
url: nil,
mousePos: mousePos
)
}

private func stripQueryParameters(from url: URL) -> URL? {
Expand Down
34 changes: 33 additions & 1 deletion TypeaheadAI/AppDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
// Created by Jeff Hara on 11/5/23.
//

import Cocoa
import CoreGraphics
import Foundation
import Supabase
import SwiftUI
import UserNotifications

class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCenterDelegate {
var eventTap: CFMachPort?

func application(_ application: NSApplication, open urls: [URL]) {
for url in urls {
// Handle the URL
Expand All @@ -33,10 +37,38 @@ class AppDelegate: NSObject, NSApplicationDelegate, UNUserNotificationCenterDele

UNUserNotificationCenter.current().delegate = self
NotificationCenter.default.post(name: .startOnboarding, object: nil)

let eventMask = (1 << CGEventType.rightMouseDown.rawValue)
guard let tap = CGEvent.tapCreate(
tap: .cgSessionEventTap,
place: .headInsertEventTap,
options: .defaultTap,
eventsOfInterest: CGEventMask(eventMask),
callback: { (proxy: CGEventTapProxy, type: CGEventType, event: CGEvent, refcon: UnsafeMutableRawPointer?) in
if type == .rightMouseDown && event.flags.contains(.maskCommand) {
// Suppress the right-click and publish smart-click event
NotificationCenter.default.post(name: .smartClick, object: nil)
return nil
}

return Unmanaged.passRetained(event)
},
userInfo: nil
) else {
print("Failed to create event tap")
exit(1)
}

eventTap = tap
let runLoopSource = CFMachPortCreateRunLoopSource(kCFAllocatorDefault, tap, 0)
CFRunLoopAddSource(CFRunLoopGetCurrent(), runLoopSource, .commonModes)
CGEvent.tapEnable(tap: tap, enable: true)
}

func applicationWillTerminate(_ aNotification: Notification) {
// Insert code here to tear down your application
if let tap = eventTap {
CGEvent.tapEnable(tap: tap, enable: false)
}
}

// MARK: - UNUserNotificationCenterDelegate
Expand Down
24 changes: 21 additions & 3 deletions TypeaheadAI/AppState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ final class AppState: ObservableObject {
private var specialCopyActor: SpecialCopyActor? = nil
private var specialSaveActor: SpecialSaveActor? = nil
private var specialOpenActor: SpecialOpenActor? = nil
private var smartClickActor: SmartClickActor? = nil

// Monitors
private let mouseEventMonitor = MouseEventMonitor()
Expand Down Expand Up @@ -104,6 +105,13 @@ final class AppState: ObservableObject {
modalManager: modalManager,
appContextManager: appContextManager
)
self.smartClickActor = SmartClickActor(
intentManager: intentManager,
clientManager: clientManager,
promptManager: promptManager,
modalManager: modalManager,
appContextManager: appContextManager
)

// Set lazy params
// TODO: Use a dependency injection framework or encapsulate these managers
Expand Down Expand Up @@ -197,9 +205,6 @@ final class AppState: ObservableObject {
mouseEventMonitor.onLeftMouseDown = { [weak self] in
self?.mouseEventMonitor.mouseClicked = true

// If the toast window is open and the user clicks out,
// we can close the window.
// NOTE: If the user has chatted, then keep it open.
if let window = self?.modalManager.toastWindow,
(self?.modalManager.messages.count ?? 0) < 2 {
let mouseLocation = NSEvent.mouseLocation
Expand All @@ -215,6 +220,19 @@ final class AppState: ObservableObject {

appVersion = getAppVersion()
startCheckingForUpdates()

NotificationCenter.default.addObserver(
self,
selector: #selector(self.smartClickWrapper(_:)),
name: .smartClick,
object: nil
)
}

@objc private func smartClickWrapper(_ notification: NSNotification) {
Task {
try await smartClickActor?.smartClick()
}
}

deinit {
Expand Down
6 changes: 3 additions & 3 deletions TypeaheadAI/ClientManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class ClientManager: ObservableObject {
userObjective: String?,
userBio: String,
userLang: String,
copiedText: String,
copiedText: String?,
messages: [Message],
history: [Message]?,
appContext: AppContext?,
Expand Down Expand Up @@ -268,7 +268,7 @@ class ClientManager: ObservableObject {
userObjective: String?,
userBio: String,
userLang: String,
copiedText: String,
copiedText: String?,
messages: [Message],
history: [Message]?,
appContext: AppContext?,
Expand Down Expand Up @@ -573,7 +573,7 @@ struct RequestPayload: Codable {
var userObjective: String?
var userBio: String
var userLang: String
var copiedText: String
var copiedText: String?
var messages: [Message]?
var history: [Message]?
var appContext: AppContext?
Expand Down
2 changes: 1 addition & 1 deletion TypeaheadAI/CrudManagers/IntentManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class IntentManager {
@MainActor
func addIntentEntry(
prompt: String,
copiedText: String,
copiedText: String?,
appContext: AppContext?
) {
let newEntry = IntentEntry(context: context)
Expand Down
4 changes: 3 additions & 1 deletion TypeaheadAI/Monitors/MouseClickMonitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class MouseEventMonitor {
func startMonitoring() {
logger.debug("Starting to monitor mouse clicks.")
mouseEventMonitor = NSEvent.addGlobalMonitorForEvents(
matching: [.leftMouseDown, .leftMouseUp],
matching: [.leftMouseDown, .leftMouseUp, .rightMouseDown],
handler: { [weak self] event in
switch event.type {
case .leftMouseDown:
Expand All @@ -38,6 +38,8 @@ class MouseEventMonitor {
self?.mouseDragged = true
}
}
case .rightMouseDown:
print("detected")
default:
break
}
Expand Down
2 changes: 2 additions & 0 deletions TypeaheadAI/Notification+Extension.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import Foundation

extension Notification.Name {
static let smartClick = Notification.Name("smartClick")

static let smartCopyPerformed = Notification.Name("smartCopyPerformed")

static let startOnboarding = Notification.Name("startOnboarding")
Expand Down
Loading