From 4d1a89aae59651a00b1c787d6868717455825c1f Mon Sep 17 00:00:00 2001 From: shavit Date: Thu, 22 Feb 2024 13:14:09 -0500 Subject: [PATCH 01/21] Update server health endpoint --- mac/FreeChat/Models/NPC/ServerHealth.swift | 11 +++-------- mac/FreeChat/Views/Settings/AISettingsView.swift | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/mac/FreeChat/Models/NPC/ServerHealth.swift b/mac/FreeChat/Models/NPC/ServerHealth.swift index 1c294e9..0d06ae2 100644 --- a/mac/FreeChat/Models/NPC/ServerHealth.swift +++ b/mac/FreeChat/Models/NPC/ServerHealth.swift @@ -15,16 +15,11 @@ fileprivate struct ServerHealthRequest { let config = URLSessionConfiguration.default config.timeoutIntervalForRequest = 3 config.timeoutIntervalForResource = 1 - let (data, response) = try await URLSession(configuration: config).data(from: url) - guard let responseCode = (response as? HTTPURLResponse)?.statusCode, - responseCode > 0 + let (_, response) = try await URLSession(configuration: config).data(from: url) + guard let code = (response as? HTTPURLResponse)?.statusCode, code > 0 else { throw ServerHealthError.invalidResponse } - guard let json = try JSONSerialization.jsonObject(with: data, options: []) as? [String: Any], - let jsonStatus: String = json["status"] as? String - else { throw ServerHealthError.invalidResponse } - - return responseCode == 200 && jsonStatus == "ok" + return code == 200 } } diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 15e9676..17c4e43 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -401,7 +401,7 @@ struct AISettingsView: View { private func updateRemoteServerURL() { let scheme = inputServerTLS ? "https" : "http" - guard let url = URL(string: "\(scheme)://\(inputServerHost):\(inputServerPort)/health") + guard let url = URL(string: "\(scheme)://\(inputServerHost):\(inputServerPort)") else { return } Task { await ServerHealth.shared.updateURL(url) From 9f9c94d918f99af47a01641757d48b72aec2a4cf Mon Sep 17 00:00:00 2001 From: shavit Date: Fri, 23 Feb 2024 12:09:11 -0500 Subject: [PATCH 02/21] Add completion method with AsyncStream --- mac/FreeChat/Models/NPC/LlamaServer.swift | 95 ++++++++++++++++++----- 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index eafd5ed..d4336b0 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -146,6 +146,7 @@ actor LlamaServer { } } + @available(*, deprecated, message: "use complete(prompt:stop:temperature) instead") func complete( prompt: String, stop: [String]?, temperature: Double?, progressHandler: (@Sendable (String) -> Void)? = nil @@ -157,27 +158,9 @@ actor LlamaServer { let start = CFAbsoluteTimeGetCurrent() try await startServer() - // hit localhost for completion - var params = CompleteParams( - prompt: prompt, - stop: stop ?? [ - "", - "\n\(Message.USER_SPEAKER_ID):", - "\n\(Message.USER_SPEAKER_ID.lowercased()):", - "[/INST]", - "[INST]", - "USER:", - ] - ) - if let t = temperature { params.temperature = t } - - var request = URLRequest(url: url("/completion")) - - request.httpMethod = "POST" - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.setValue("text/event-stream", forHTTPHeaderField: "Accept") - request.setValue("keep-alive", forHTTPHeaderField: "Connection") - request.httpBody = params.toJSON().data(using: .utf8) + // hit server for completion + let params = CompleteParams(prompt: prompt, stop: stop, temperature: temperature) + let request = buildRequest(path: "/completion", completeParams: params) // Use EventSource to receive server sent events eventSource = EventSource(request: request) @@ -249,6 +232,63 @@ actor LlamaServer { ) } + func complete(prompt: String, stop: [String]?, temperature: Double?) + throws -> AsyncStream { + let params = CompleteParams(prompt: prompt, stop: stop, temperature: temperature) + let request = buildRequest(path: "/completion", completeParams: params) + + return AsyncStream { continuation in + Task.detached { + let eventSource = EventSource(request: request) + eventSource.connect() + + for await event in eventSource.events { + if await self.interrupted { + await eventSource.close() + continuation.finish() + } + switch event { + case .open: continue + case .error(let error): + print("llama.cpp EventSource server error:", error.localizedDescription) + await eventSource.close() + case .message(let message): + if let response = try await self.decodeCompletionMessage(message: message.data) { + continuation.yield(response.content) + if response.stop { + await eventSource.close() + continuation.finish() + } + } + case .closed: + await eventSource.close() + continuation.finish() + print("llama.cpp EventSource closed") + } + } + await eventSource.close() + continuation.finish() + } + } + } + + private func buildRequest(path: String, completeParams: CompleteParams) -> URLRequest { + var request = URLRequest(url: self.url(path)) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("text/event-stream", forHTTPHeaderField: "Accept") + request.setValue("keep-alive", forHTTPHeaderField: "Connection") + request.httpBody = completeParams.toJSON().data(using: .utf8) + + return request + } + + private func decodeCompletionMessage(message: String?) throws -> Response? { + guard let data = message?.data(using: .utf8) else { return nil } + let decoder = JSONDecoder() + return try decoder.decode(Response.self, from: data) + } + func interrupt() async { if let eventSource, eventSource.readyState != .closed { await eventSource.close() @@ -317,6 +357,19 @@ actor LlamaServer { var mirostat_eta = 0.1 // learning rate var cache_prompt = true + init(prompt: String, stop: [String]?, temperature: Double?) { + self.prompt = prompt + self.stop = stop ?? [ + "", + "\n\(Message.USER_SPEAKER_ID):", + "\n\(Message.USER_SPEAKER_ID.lowercased()):", + "[/INST]", + "[INST]", + "USER:", + ] + self.temperature = temperature ?? Agent.DEFAULT_TEMP + } + func toJSON() -> String { let encoder = JSONEncoder() encoder.outputFormatting = .prettyPrinted From cf295845a748976dec54317890d6837fc4f73c87 Mon Sep 17 00:00:00 2001 From: shavit Date: Fri, 23 Feb 2024 14:22:11 -0500 Subject: [PATCH 03/21] Add Ollama backend --- mac/FreeChat/Models/NPC/Agent.swift | 15 +++ mac/FreeChat/Models/NPC/OllamaBackend.swift | 113 ++++++++++++++++++ .../ConversationView/ConversationView.swift | 1 + 3 files changed, 129 insertions(+) create mode 100644 mac/FreeChat/Models/NPC/OllamaBackend.swift diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 79e974d..745fbd6 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -19,8 +19,10 @@ class Agent: ObservableObject { @Published var pendingMessage = "" @Published var status: Status = .cold + @available(*, deprecated, message: "use backend with a common interface") // each agent runs their own server var llama: LlamaServer + var backend: OllamaBackend? init(id: String, prompt: String, systemPrompt: String, modelPath: String, contextLength: Int) { self.id = id @@ -41,6 +43,8 @@ class Agent: ObservableObject { status = .processing } + // TODO: Uncomment this block + /* prompt = template.run(systemPrompt: systemPrompt, messages: messages) pendingMessage = "" @@ -57,6 +61,16 @@ class Agent: ObservableObject { status = .ready return response + */ + + pendingMessage = "" + for try await partialResponse in try await backend!.complete(messages: messages) { + self.pendingMessage += partialResponse + self.prompt = pendingMessage + } + status = .ready + + return LlamaServer.CompleteResponse(text: pendingMessage, responseStartSeconds: 0) } func handleCompletionProgress(partialResponse: String) { @@ -67,6 +81,7 @@ class Agent: ObservableObject { func interrupt() async { if status != .processing, status != .coldProcessing { return } await llama.interrupt() + await backend?.interrupt() } func warmup() async throws { diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift new file mode 100644 index 0000000..67ea8ed --- /dev/null +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -0,0 +1,113 @@ +// +// OllamaBackend.swift +// FreeChat +// + +import Foundation +import EventSource + +actor OllamaBackend { + + struct RoleMessage: Codable { + let role: String + let content: String + } + + struct CompleteParams: Encodable { + let messages: [RoleMessage] + let model: String + let stream = true + + func toJSON() -> String { + let encoder = JSONEncoder() + let jsonData = try? encoder.encode(self) + return String(data: jsonData!, encoding: .utf8)! + } + } + + struct Response: Decodable { + struct Choice: Decodable { + let index: Int + let delta: RoleMessage + let finishReason: String? + } + let id: String + let object: String + let created: Int + let model: String + let systemFingerprint: String + let choices: [Choice] + } + + private var interrupted = false + + private let contextLength: Int + private let scheme: String + private let host: String + private let port: String + + init(contextLength: Int, tls: Bool, host: String, port: String) { + self.contextLength = contextLength + self.scheme = tls ? "https" : "http" + self.host = host + self.port = port + } + + func complete(messages: [String]) throws -> AsyncStream { + let messages = [RoleMessage(role: "system", content: "you know")] + + messages.map({ RoleMessage(role: "user", content: $0) }) + let params = CompleteParams(messages: messages, model: "orca-mini") + let url = URL(string: "\(scheme)://\(host):\(port)/v1/chat/completions")! + let request = buildRequest(url: url, params: params) + + return AsyncStream { continuation in + Task.detached { + let eventSource = EventSource(request: request) + eventSource.connect() + + L: for await event in eventSource.events { + guard await !self.interrupted else { break L } + switch event { + case .open: continue + case .error(let error): + print("ollama EventSource server error:", error.localizedDescription) + break L + case .message(let message): + if let response = try await self.decodeCompletionMessage(message: message.data), + let choice = response.choices.first { + continuation.yield(choice.delta.content) + if choice.finishReason == "stop" { break L } + } + case .closed: + print("ollama EventSource closed") + break L + } + } + + await eventSource.close() + continuation.finish() + } + } + } + + func interrupt() async { interrupted = true } + + func buildRequest(url: URL, params: CompleteParams) -> URLRequest { + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("text/event-stream", forHTTPHeaderField: "Accept") + request.setValue("keep-alive", forHTTPHeaderField: "Connection") + request.setValue("Bearer: none", forHTTPHeaderField: "Authorization") + request.httpBody = params.toJSON().data(using: .utf8) + + return request + } + + private func decodeCompletionMessage(message: String?) throws -> Response? { + guard let data = message?.data(using: .utf8) else { return nil } + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(Response.self, from: data) + } +} diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 08d6d29..644bb73 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -179,6 +179,7 @@ struct ConversationView: View, Sendable { else { return } await agent.llama.stopServer() agent.llama = LlamaServer(contextLength: contextLength, tls: tls, host: host, port: port) + agent.backend = OllamaBackend(contextLength: contextLength, tls: tls, host: host, port: port) } private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) { From 7efff5713c835e5a890991d9b9f3bda02601fd88 Mon Sep 17 00:00:00 2001 From: shavit Date: Fri, 23 Feb 2024 14:22:23 -0500 Subject: [PATCH 04/21] Break stream for any finish reason --- mac/FreeChat/Models/NPC/OllamaBackend.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift index 67ea8ed..379aac7 100644 --- a/mac/FreeChat/Models/NPC/OllamaBackend.swift +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -76,7 +76,7 @@ actor OllamaBackend { if let response = try await self.decodeCompletionMessage(message: message.data), let choice = response.choices.first { continuation.yield(choice.delta.content) - if choice.finishReason == "stop" { break L } + if choice.finishReason != nil { break L } } case .closed: print("ollama EventSource closed") From 1fdcd2666b9f16d492f66628194197254c044153 Mon Sep 17 00:00:00 2001 From: shavit Date: Fri, 23 Feb 2024 14:30:51 -0500 Subject: [PATCH 05/21] Fetch Ollama model list --- mac/FreeChat.xcodeproj/project.pbxproj | 4 ++ mac/FreeChat/Models/NPC/LlamaServer.swift | 32 ++++++-------- mac/FreeChat/Models/NPC/OllamaBackend.swift | 49 ++++++++++++++++++--- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/mac/FreeChat.xcodeproj/project.pbxproj b/mac/FreeChat.xcodeproj/project.pbxproj index 79d473c..7c19ac4 100644 --- a/mac/FreeChat.xcodeproj/project.pbxproj +++ b/mac/FreeChat.xcodeproj/project.pbxproj @@ -58,6 +58,7 @@ A1F617562A782E4F00F2048C /* ConversationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F617552A782E4F00F2048C /* ConversationView.swift */; }; A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F617572A7836AE00F2048C /* Message+Extensions.swift */; }; A1F6175B2A7838F700F2048C /* Conversation+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */; }; + DE16617B2B8A40D100826556 /* OllamaBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE16617A2B8A40D100826556 /* OllamaBackend.swift */; }; DEA8CF572B51938B007A4CE7 /* FreeChatAppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */; }; DEEA39CC2B586F3800992592 /* ServerHealth.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEEA39CB2B586F3800992592 /* ServerHealth.swift */; }; /* End PBXBuildFile section */ @@ -176,6 +177,7 @@ A1F617552A782E4F00F2048C /* ConversationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConversationView.swift; sourceTree = ""; }; A1F617572A7836AE00F2048C /* Message+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Message+Extensions.swift"; sourceTree = ""; }; A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Conversation+Extensions.swift"; sourceTree = ""; }; + DE16617A2B8A40D100826556 /* OllamaBackend.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = OllamaBackend.swift; path = FreeChat/Models/NPC/OllamaBackend.swift; sourceTree = SOURCE_ROOT; }; DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FreeChatAppDelegate.swift; sourceTree = ""; }; DEEA39CB2B586F3800992592 /* ServerHealth.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ServerHealth.swift; sourceTree = ""; }; /* End PBXFileReference section */ @@ -259,6 +261,7 @@ A137A3872AB502DB00BE1AE0 /* ggml-metal.metal */, A17A2E122A79A005006CDD90 /* Agent.swift */, A17A2E132A79A005006CDD90 /* LlamaServer.swift */, + DE16617A2B8A40D100826556 /* OllamaBackend.swift */, DEEA39CB2B586F3800992592 /* ServerHealth.swift */, A137A3822AB4FD4800BE1AE0 /* freechat-server */, A1A286F32A7E17750004967A /* server-watchdog */, @@ -577,6 +580,7 @@ A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */, A13C8C682A902A1200EC18D8 /* CGKeycode+Extensions.swift in Sources */, A15D50D22A7F539800FC1681 /* NavList.swift in Sources */, + DE16617B2B8A40D100826556 /* OllamaBackend.swift in Sources */, A1156D342AD1F5EF00081313 /* Templates.swift in Sources */, A1D4B49D2B9A780B00B9C4BE /* AgentDefaults.swift in Sources */, A1F617262A782AA100F2048C /* FreeChat.swift in Sources */, diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index d4336b0..bf419f8 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -242,30 +242,24 @@ actor LlamaServer { let eventSource = EventSource(request: request) eventSource.connect() - for await event in eventSource.events { - if await self.interrupted { - await eventSource.close() - continuation.finish() - } + L: for await event in eventSource.events { + guard await !self.interrupted else { break L } switch event { case .open: continue case .error(let error): print("llama.cpp EventSource server error:", error.localizedDescription) - await eventSource.close() + break L case .message(let message): - if let response = try await self.decodeCompletionMessage(message: message.data) { + if let response = try Response.from(data: message.data?.data(using: .utf8)) { continuation.yield(response.content) - if response.stop { - await eventSource.close() - continuation.finish() - } + if response.stop { break L } } case .closed: - await eventSource.close() - continuation.finish() print("llama.cpp EventSource closed") + break L } } + await eventSource.close() continuation.finish() } @@ -283,12 +277,6 @@ actor LlamaServer { return request } - private func decodeCompletionMessage(message: String?) throws -> Response? { - guard let data = message?.data(using: .utf8) else { return nil } - let decoder = JSONDecoder() - return try decoder.decode(Response.self, from: data) - } - func interrupt() async { if let eventSource, eventSource.readyState != .closed { await eventSource.close() @@ -393,6 +381,12 @@ actor LlamaServer { struct Response: Codable { let content: String let stop: Bool + + static func from(data: Data?) throws -> Response? { + guard let data else { return nil } + let decoder = JSONDecoder() + return try decoder.decode(Response.self, from: data) + } } struct StopResponse: Codable { diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift index 379aac7..60473bf 100644 --- a/mac/FreeChat/Models/NPC/OllamaBackend.swift +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -37,6 +37,13 @@ actor OllamaBackend { let model: String let systemFingerprint: String let choices: [Choice] + + static func from(data: Data?) throws -> Response? { + guard let data else { return nil } + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(Response.self, from: data) + } } private var interrupted = false @@ -73,8 +80,8 @@ actor OllamaBackend { print("ollama EventSource server error:", error.localizedDescription) break L case .message(let message): - if let response = try await self.decodeCompletionMessage(message: message.data), - let choice = response.choices.first { + if let response = try Response.from(data: message.data?.data(using: .utf8)), + let choice = response.choices.first { continuation.yield(choice.delta.content) if choice.finishReason != nil { break L } } @@ -104,10 +111,38 @@ actor OllamaBackend { return request } - private func decodeCompletionMessage(message: String?) throws -> Response? { - guard let data = message?.data(using: .utf8) else { return nil } - let decoder = JSONDecoder() - decoder.keyDecodingStrategy = .convertFromSnakeCase - return try decoder.decode(Response.self, from: data) + // MARK: - List models + + struct TagsResponse: Decodable { + struct Model: Decodable { + struct Details: Decodable { + let parentModel: String? + let format: String + let family: String + let families: [String]? + let parameterSize: String + let quantizationLevel: String + } + let name: String + let model: String + let modifiedAt: String + let size: Int + let digest: String + let details: Details + } + let models: [Model] + + static func from(data: Data) throws -> TagsResponse { + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(TagsResponse.self, from: data) + } + } + + nonisolated func fetchModels() async throws -> TagsResponse { + // TODO: Replace force-unwrap + let url = URL(string: "\(scheme)://\(host):\(port)/api/tags")! + let (data, _) = try await URLSession.shared.data(from: url) + return try TagsResponse.from(data: data) } } From 56cf53e1cc2a6dfd9005acfda877b4745c91d9e5 Mon Sep 17 00:00:00 2001 From: shavit Date: Sun, 25 Feb 2024 12:59:09 -0500 Subject: [PATCH 06/21] Add Ollama optional parameters --- mac/FreeChat/Models/NPC/OllamaBackend.swift | 42 ++++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift index 60473bf..d30786d 100644 --- a/mac/FreeChat/Models/NPC/OllamaBackend.swift +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -14,12 +14,40 @@ actor OllamaBackend { } struct CompleteParams: Encodable { + struct OllamaOptions: Encodable { + enum Mirostat: Int, Encodable { + case disabled = 0 + case v1 = 1 + case v2 = 2 + } + let mirostat: Mirostat + let mirostatETA: Float = 0.1 + let mirostatTAU: Float = 5 + let numCTX = 2048 + let numGQA = 1 + let numGPU: Int? = nil + let numThread: Int? = nil + let repeatLastN = 64 + let repeatPenalty: Float = 1.1 + let temperature: Float = 0.7 + let seed: Int? = nil + let stop: String? = nil + let tfsZ: Float? = nil + let numPredict = 128 + let topK = 40 + let topP: Float = 0.9 + } let messages: [RoleMessage] let model: String + let format: String? = nil + let options: OllamaOptions? = nil + let template: String? = nil let stream = true + let keepAlive = true func toJSON() -> String { let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase let jsonData = try? encoder.encode(self) return String(data: jsonData!, encoding: .utf8)! } @@ -49,23 +77,20 @@ actor OllamaBackend { private var interrupted = false private let contextLength: Int - private let scheme: String - private let host: String - private let port: String + private let baseURL: URL init(contextLength: Int, tls: Bool, host: String, port: String) { self.contextLength = contextLength - self.scheme = tls ? "https" : "http" - self.host = host - self.port = port + self.baseURL = URL(string: "\(tls ? "https" : "http")://\(host):\(port)")! } func complete(messages: [String]) throws -> AsyncStream { let messages = [RoleMessage(role: "system", content: "you know")] + messages.map({ RoleMessage(role: "user", content: $0) }) let params = CompleteParams(messages: messages, model: "orca-mini") - let url = URL(string: "\(scheme)://\(host):\(port)/v1/chat/completions")! + let url = baseURL.appendingPathComponent("/v1/chat/completions") let request = buildRequest(url: url, params: params) + interrupted = false return AsyncStream { continuation in Task.detached { @@ -140,8 +165,7 @@ actor OllamaBackend { } nonisolated func fetchModels() async throws -> TagsResponse { - // TODO: Replace force-unwrap - let url = URL(string: "\(scheme)://\(host):\(port)/api/tags")! + let url = baseURL.appendingPathComponent("/api/tags") let (data, _) = try await URLSession.shared.data(from: url) return try TagsResponse.from(data: data) } From e88b52ee9ab5426c5c7b6852663eeed5ca497ee5 Mon Sep 17 00:00:00 2001 From: shavit Date: Sun, 3 Mar 2024 15:51:00 -0500 Subject: [PATCH 07/21] Use OpenAI backend for remote and local server * Add backend types with default URLs * Use llama to only run the local instance * Make submit(input:) async --- mac/FreeChat.xcodeproj/project.pbxproj | 8 +- .../xcshareddata/swiftpm/Package.resolved | 22 +- mac/FreeChat/Models/NPC/Agent.swift | 41 +-- mac/FreeChat/Models/NPC/LlamaServer.swift | 250 +----------------- ...llamaBackend.swift => OpenAIBackend.swift} | 40 ++- .../ConversationView/ConversationView.swift | 111 ++++---- 6 files changed, 107 insertions(+), 365 deletions(-) rename mac/FreeChat/Models/NPC/{OllamaBackend.swift => OpenAIBackend.swift} (81%) diff --git a/mac/FreeChat.xcodeproj/project.pbxproj b/mac/FreeChat.xcodeproj/project.pbxproj index 7c19ac4..dc44399 100644 --- a/mac/FreeChat.xcodeproj/project.pbxproj +++ b/mac/FreeChat.xcodeproj/project.pbxproj @@ -58,7 +58,7 @@ A1F617562A782E4F00F2048C /* ConversationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F617552A782E4F00F2048C /* ConversationView.swift */; }; A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F617572A7836AE00F2048C /* Message+Extensions.swift */; }; A1F6175B2A7838F700F2048C /* Conversation+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */; }; - DE16617B2B8A40D100826556 /* OllamaBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE16617A2B8A40D100826556 /* OllamaBackend.swift */; }; + DE16617B2B8A40D100826556 /* OpenAIBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE16617A2B8A40D100826556 /* OpenAIBackend.swift */; }; DEA8CF572B51938B007A4CE7 /* FreeChatAppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */; }; DEEA39CC2B586F3800992592 /* ServerHealth.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEEA39CB2B586F3800992592 /* ServerHealth.swift */; }; /* End PBXBuildFile section */ @@ -177,7 +177,7 @@ A1F617552A782E4F00F2048C /* ConversationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConversationView.swift; sourceTree = ""; }; A1F617572A7836AE00F2048C /* Message+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Message+Extensions.swift"; sourceTree = ""; }; A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Conversation+Extensions.swift"; sourceTree = ""; }; - DE16617A2B8A40D100826556 /* OllamaBackend.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = OllamaBackend.swift; path = FreeChat/Models/NPC/OllamaBackend.swift; sourceTree = SOURCE_ROOT; }; + DE16617A2B8A40D100826556 /* OpenAIBackend.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = OpenAIBackend.swift; path = FreeChat/Models/NPC/OpenAIBackend.swift; sourceTree = SOURCE_ROOT; }; DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FreeChatAppDelegate.swift; sourceTree = ""; }; DEEA39CB2B586F3800992592 /* ServerHealth.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ServerHealth.swift; sourceTree = ""; }; /* End PBXFileReference section */ @@ -261,7 +261,7 @@ A137A3872AB502DB00BE1AE0 /* ggml-metal.metal */, A17A2E122A79A005006CDD90 /* Agent.swift */, A17A2E132A79A005006CDD90 /* LlamaServer.swift */, - DE16617A2B8A40D100826556 /* OllamaBackend.swift */, + DE16617A2B8A40D100826556 /* OpenAIBackend.swift */, DEEA39CB2B586F3800992592 /* ServerHealth.swift */, A137A3822AB4FD4800BE1AE0 /* freechat-server */, A1A286F32A7E17750004967A /* server-watchdog */, @@ -580,7 +580,7 @@ A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */, A13C8C682A902A1200EC18D8 /* CGKeycode+Extensions.swift in Sources */, A15D50D22A7F539800FC1681 /* NavList.swift in Sources */, - DE16617B2B8A40D100826556 /* OllamaBackend.swift in Sources */, + DE16617B2B8A40D100826556 /* OpenAIBackend.swift in Sources */, A1156D342AD1F5EF00081313 /* Templates.swift in Sources */, A1D4B49D2B9A780B00B9C4BE /* AgentDefaults.swift in Sources */, A1F617262A782AA100F2048C /* FreeChat.swift in Sources */, diff --git a/mac/FreeChat.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mac/FreeChat.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 985f0f9..3d62681 100644 --- a/mac/FreeChat.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/mac/FreeChat.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -5,8 +5,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/Recouse/EventSource.git", "state" : { - "revision" : "fcd7152a3106d75287c7303bba40a4761e5b7f6d", - "version" : "0.0.5" + "revision" : "ffaa978620b19c891d107941c1b36d18836e8ecb", + "version" : "0.0.7" } }, { @@ -27,24 +27,6 @@ "version" : "0.16.0" } }, - { - "identity" : "swift-async-algorithms", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-async-algorithms.git", - "state" : { - "revision" : "9cfed92b026c524674ed869a4ff2dcfdeedf8a2a", - "version" : "0.1.0" - } - }, - { - "identity" : "swift-collections", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-collections.git", - "state" : { - "revision" : "937e904258d22af6e447a0b72c0bc67583ef64a2", - "version" : "1.0.4" - } - }, { "identity" : "swift-markdown-ui", "kind" : "remoteSourceControl", diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 745fbd6..2f29e3c 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -19,10 +19,9 @@ class Agent: ObservableObject { @Published var pendingMessage = "" @Published var status: Status = .cold - @available(*, deprecated, message: "use backend with a common interface") // each agent runs their own server var llama: LlamaServer - var backend: OllamaBackend? + private var backend: OpenAIBackend! init(id: String, prompt: String, systemPrompt: String, modelPath: String, contextLength: Int) { self.id = id @@ -31,38 +30,17 @@ class Agent: ObservableObject { llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) } + func createBackend(contextLength: Int, tls: Bool, host: String, port: String) { + self.backend = OpenAIBackend(backend: .ollama, contextLength: contextLength, tls: tls, host: host, port: port) + } + // this is the main loop of the agent // listen -> respond -> update mental model and save checkpoint // we respond before updating to avoid a long delay after user input func listenThinkRespond( speakerId: String, messages: [String], template: Template, temperature: Double? - ) async throws -> LlamaServer.CompleteResponse { - if status == .cold { - status = .coldProcessing - } else { - status = .processing - } - - // TODO: Uncomment this block - /* - prompt = template.run(systemPrompt: systemPrompt, messages: messages) - - pendingMessage = "" - - let response = try await llama.complete( - prompt: prompt, stop: template.stopWords, temperature: temperature - ) { partialResponse in - DispatchQueue.main.async { - self.handleCompletionProgress(partialResponse: partialResponse) - } - } - - pendingMessage = response.text - status = .ready - - return response - */ - + ) async throws -> OpenAIBackend.ResponseSummary { + status = status == .cold ? .coldProcessing : .processing pendingMessage = "" for try await partialResponse in try await backend!.complete(messages: messages) { self.pendingMessage += partialResponse @@ -70,7 +48,7 @@ class Agent: ObservableObject { } status = .ready - return LlamaServer.CompleteResponse(text: pendingMessage, responseStartSeconds: 0) + return OpenAIBackend.ResponseSummary(text: pendingMessage, responseStartSeconds: 0) } func handleCompletionProgress(partialResponse: String) { @@ -86,11 +64,14 @@ class Agent: ObservableObject { func warmup() async throws { if prompt.isEmpty, systemPrompt.isEmpty { return } + // TODO: Implement this part + /* do { _ = try await llama.complete(prompt: prompt, stop: nil, temperature: nil) status = .ready } catch { status = .cold } + */ } } diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index bf419f8..325839b 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -36,30 +36,14 @@ actor LlamaServer { private var serverUp = false private var serverErrorMessage = "" private var eventSource: EventSource? - private let host: String - private let port: String - private let scheme: String - private var interrupted = false + private let url: URL private var monitor = Process() init(modelPath: String, contextLength: Int) { self.modelPath = modelPath self.contextLength = contextLength - self.scheme = "http" - self.host = "127.0.0.1" - self.port = "8690" - } - - init(contextLength: Int, tls: Bool, host: String, port: String) { - self.contextLength = contextLength - self.scheme = tls ? "https" : "http" - self.host = host - self.port = port - } - - private func url(_ path: String) -> URL { - URL(string: "\(scheme)://\(host):\(port)\(path)")! + self.url = URL(string: "http://127.0.0.1:8690")! } // Start a monitor process that will terminate the server when our app dies. @@ -111,22 +95,18 @@ actor LlamaServer { "--model", modelPath, "--threads", "\(max(1, Int(ceil(Double(processes) / 3.0 * 2.0))))", "--ctx-size", "\(contextLength)", - "--port", port, + "--port", "8690", "--n-gpu-layers", gpu.available && useGPU ? "4" : "0", ] - print("starting llama.cpp server \(process.arguments!.joined(separator: " "))") process.standardInput = FileHandle.nullDevice - // To debug with server's output, comment these 2 lines to inherit stdout. process.standardOutput = FileHandle.nullDevice process.standardError = FileHandle.nullDevice try process.run() - try await waitForServer() - try startAppMonitor(serverPID: process.processIdentifier) let endTime = DispatchTime.now() @@ -138,155 +118,18 @@ actor LlamaServer { } func stopServer() { - if process.isRunning { - process.terminate() - } - if monitor.isRunning { - monitor.terminate() - } - } - - @available(*, deprecated, message: "use complete(prompt:stop:temperature) instead") - func complete( - prompt: String, stop: [String]?, temperature: Double?, - progressHandler: (@Sendable (String) -> Void)? = nil - ) async throws -> CompleteResponse { - #if DEBUG - print("START PROMPT\n \(prompt) \nEND PROMPT\n\n") - #endif - - let start = CFAbsoluteTimeGetCurrent() - try await startServer() - - // hit server for completion - let params = CompleteParams(prompt: prompt, stop: stop, temperature: temperature) - let request = buildRequest(path: "/completion", completeParams: params) - - // Use EventSource to receive server sent events - eventSource = EventSource(request: request) - eventSource!.connect() - - var response = "" - var responseDiff = 0.0 - var stopResponse: StopResponse? - listenLoop: for await event in eventSource!.events { - switch event { - case .open: - continue listenLoop - case .error(let error): - print("llama.cpp EventSource server error:", error.localizedDescription) - case .message(let message): - // parse json in message.data string then print the data.content value and append it to response - if let data = message.data?.data(using: .utf8) { - let decoder = JSONDecoder() - - do { - let responseObj = try decoder.decode(Response.self, from: data) - let fragment = responseObj.content - response.append(fragment) - progressHandler?(fragment) - if responseDiff == 0 { - responseDiff = CFAbsoluteTimeGetCurrent() - start - } - - if responseObj.stop { - do { - stopResponse = try decoder.decode(StopResponse.self, from: data) - } catch { - print("error decoding stopResponse", error as Any, data) - } - #if DEBUG - print( - "server.cpp stopResponse", - NSString(data: data, encoding: String.Encoding.utf8.rawValue) ?? "missing") - #endif - break listenLoop - } - } catch { - print("error decoding responseObj", error as Any, data) - break listenLoop - } - } - case .closed: - print("llama.cpp EventSource closed") - break listenLoop - } - } - - if responseDiff > 0 { - print("response: \(response)") - print("\n\n🦙 started response in \(responseDiff) seconds") - } - - // adding a trailing quote or space is a common mistake with the smaller model output - let cleanText = removeUnmatchedTrailingQuote(response).trimmingCharacters( - in: .whitespacesAndNewlines) - - let modelName = stopResponse?.model.split(separator: "/").last?.map { String($0) }.joined() - return CompleteResponse( - text: cleanText, - responseStartSeconds: responseDiff, - predictedPerSecond: stopResponse?.timings.predicted_per_second, - modelName: modelName, - nPredicted: stopResponse?.tokens_predicted - ) - } - - func complete(prompt: String, stop: [String]?, temperature: Double?) - throws -> AsyncStream { - let params = CompleteParams(prompt: prompt, stop: stop, temperature: temperature) - let request = buildRequest(path: "/completion", completeParams: params) - - return AsyncStream { continuation in - Task.detached { - let eventSource = EventSource(request: request) - eventSource.connect() - - L: for await event in eventSource.events { - guard await !self.interrupted else { break L } - switch event { - case .open: continue - case .error(let error): - print("llama.cpp EventSource server error:", error.localizedDescription) - break L - case .message(let message): - if let response = try Response.from(data: message.data?.data(using: .utf8)) { - continuation.yield(response.content) - if response.stop { break L } - } - case .closed: - print("llama.cpp EventSource closed") - break L - } - } - - await eventSource.close() - continuation.finish() - } - } - } - - private func buildRequest(path: String, completeParams: CompleteParams) -> URLRequest { - var request = URLRequest(url: self.url(path)) - request.httpMethod = "POST" - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.setValue("text/event-stream", forHTTPHeaderField: "Accept") - request.setValue("keep-alive", forHTTPHeaderField: "Connection") - request.httpBody = completeParams.toJSON().data(using: .utf8) - - return request + if process.isRunning { process.terminate() } + if monitor.isRunning { monitor.terminate() } } func interrupt() async { if let eventSource, eventSource.readyState != .closed { await eventSource.close() } - interrupted = true } private func waitForServer() async throws { guard process.isRunning else { return } - interrupted = false serverErrorMessage = "" guard let modelPath = self.modelPath else { return } @@ -294,7 +137,7 @@ actor LlamaServer { modelPath.split(separator: "/").last?.map { String($0) }.joined() ?? "Unknown model name" let serverHealth = ServerHealth() - await serverHealth.updateURL(url("/health")) + await serverHealth.updateURL(url.appendingPathComponent("/health")) await serverHealth.check() var timeout = 60 @@ -315,87 +158,6 @@ actor LlamaServer { } } } - - struct CompleteResponse { - var text: String - var responseStartSeconds: Double - var predictedPerSecond: Double? - var modelName: String? - var nPredicted: Int? - } - - struct CompleteParams: Codable { - var prompt: String - var stop: [String] = [""] - var stream = true - var n_threads = 6 - - var n_predict = -1 - var temperature = DEFAULT_TEMP - var repeat_last_n = 128 // 0 = disable penalty, -1 = context size - var repeat_penalty = 1.18 // 1.0 = disabled - var top_k = 40 // <= 0 to use vocab size - var top_p = 0.95 // 1.0 = disabled - var tfs_z = 1.0 // 1.0 = disabled - var typical_p = 1.0 // 1.0 = disabled - var presence_penalty = 0.0 // 0.0 = disabled - var frequency_penalty = 0.0 // 0.0 = disabled - var mirostat = 0 // 0/1/2 - var mirostat_tau = 5 // target entropy - var mirostat_eta = 0.1 // learning rate - var cache_prompt = true - - init(prompt: String, stop: [String]?, temperature: Double?) { - self.prompt = prompt - self.stop = stop ?? [ - "", - "\n\(Message.USER_SPEAKER_ID):", - "\n\(Message.USER_SPEAKER_ID.lowercased()):", - "[/INST]", - "[INST]", - "USER:", - ] - self.temperature = temperature ?? Agent.DEFAULT_TEMP - } - - func toJSON() -> String { - let encoder = JSONEncoder() - encoder.outputFormatting = .prettyPrinted - let jsonData = try? encoder.encode(self) - return String(data: jsonData!, encoding: .utf8)! - } - } - - struct Timings: Codable { - let prompt_n: Int - let prompt_ms: Double - let prompt_per_token_ms: Double - let prompt_per_second: Double? - - let predicted_n: Int - let predicted_ms: Double - let predicted_per_token_ms: Double - let predicted_per_second: Double? - } - - struct Response: Codable { - let content: String - let stop: Bool - - static func from(data: Data?) throws -> Response? { - guard let data else { return nil } - let decoder = JSONDecoder() - return try decoder.decode(Response.self, from: data) - } - } - - struct StopResponse: Codable { - let content: String - let model: String - let tokens_predicted: Int - let tokens_evaluated: Int - let timings: Timings - } } enum LlamaServerError: LocalizedError { diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift similarity index 81% rename from mac/FreeChat/Models/NPC/OllamaBackend.swift rename to mac/FreeChat/Models/NPC/OpenAIBackend.swift index d30786d..5b1c2e4 100644 --- a/mac/FreeChat/Models/NPC/OllamaBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -1,12 +1,28 @@ // -// OllamaBackend.swift +// OpenAIBackend.swift // FreeChat // import Foundation import EventSource -actor OllamaBackend { +actor OpenAIBackend { + + enum BackendType: String, CaseIterable { + case local = "local" + case llama = "llama" + case openai = "openai" + case ollama = "ollama" + + var defaultURL: URL { + switch self { + case .local: return URL(string: "http://127.0.0.1:8690")! + case .llama: return URL(string: "http://127.0.0.1:8690")! + case .ollama: return URL(string: "http://127.0.0.1:11434")! + case .openai: return URL(string: "https://api.openai.com")! + } + } + } struct RoleMessage: Codable { let role: String @@ -74,14 +90,24 @@ actor OllamaBackend { } } + struct ResponseSummary { + var text: String + var responseStartSeconds: Double + var predictedPerSecond: Double? + var modelName: String? + var nPredicted: Int? + } + private var interrupted = false private let contextLength: Int private let baseURL: URL + private let backendType: BackendType - init(contextLength: Int, tls: Bool, host: String, port: String) { + init(backend: BackendType, contextLength: Int, tls: Bool, host: String, port: String) { self.contextLength = contextLength self.baseURL = URL(string: "\(tls ? "https" : "http")://\(host):\(port)")! + self.backendType = backend } func complete(messages: [String]) throws -> AsyncStream { @@ -116,21 +142,21 @@ actor OllamaBackend { } } - await eventSource.close() continuation.finish() + await eventSource.close() } } } func interrupt() async { interrupted = true } - func buildRequest(url: URL, params: CompleteParams) -> URLRequest { + func buildRequest(url: URL, params: CompleteParams, token: String = "none") -> URLRequest { var request = URLRequest(url: url) request.httpMethod = "POST" request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.setValue("text/event-stream", forHTTPHeaderField: "Accept") request.setValue("keep-alive", forHTTPHeaderField: "Connection") - request.setValue("Bearer: none", forHTTPHeaderField: "Authorization") + request.setValue("Bearer: \(token)", forHTTPHeaderField: "Authorization") request.httpBody = params.toJSON().data(using: .utf8) return request @@ -164,7 +190,7 @@ actor OllamaBackend { } } - nonisolated func fetchModels() async throws -> TagsResponse { + nonisolated func fetchOllamaModels() async throws -> TagsResponse { let url = baseURL.appendingPathComponent("/api/tags") let (data, _) = try await URLSession.shared.data(from: url) return try TagsResponse.from(data: data) diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 644bb73..cd84eee 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -91,13 +91,13 @@ struct ConversationView: View, Sendable { } } } - .padding(.vertical, 12) - .onReceive( + .padding(.vertical, 12) + .onReceive( agent.$pendingMessage.throttle(for: .seconds(0.1), scheduler: RunLoop.main, latest: true) ) { text in pendingMessageText = text } - .onReceive( + .onReceive( agent.$pendingMessage.throttle(for: .seconds(0.2), scheduler: RunLoop.main, latest: true) ) { _ in DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { @@ -105,10 +105,13 @@ struct ConversationView: View, Sendable { } } } - .textSelection(.enabled) - .safeAreaInset(edge: .bottom, spacing: 0) { + .textSelection(.enabled) + .safeAreaInset(edge: .bottom, spacing: 0) { MessageTextField { s in - submit(s) + Task { + // TODO: Disable or cancel if busy + await submit(s) + } } } .frame(maxWidth: .infinity) @@ -117,12 +120,12 @@ struct ConversationView: View, Sendable { .onChange(of: selectedModelId) { showConversation(conversation, modelId: $0) } .navigationTitle(conversation.titleWithDefault) .alert(isPresented: $showErrorAlert, error: llamaError) { _ in - Button("OK") { - llamaError = nil + Button("OK") { + llamaError = nil + } + } message: { error in + Text(error.recoverySuggestion ?? "") } - } message: { error in - Text(error.recoverySuggestion ?? "") - } .background(Color.textBackground) } @@ -158,9 +161,7 @@ struct ConversationView: View, Sendable { } private func initializeServerLocal(modelId: String) async { - guard let id = UUID(uuidString: modelId) - else { return } - + guard let id = UUID(uuidString: modelId) else { return } let llamaPath = await agent.llama.modelPath let req = Model.fetchRequest() req.predicate = NSPredicate(format: "id == %@", id as CVarArg) @@ -169,6 +170,9 @@ struct ConversationView: View, Sendable { modelPath != llamaPath { await agent.llama.stopServer() agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) + // TODO: Check the backend type + let backendURL = OpenAIBackend.BackendType.local.defaultURL + agent.createBackend(contextLength: contextLength, tls: false, host: backendURL.host()!, port: "\(backendURL.port!)") } } @@ -178,8 +182,7 @@ struct ConversationView: View, Sendable { let port = serverPort else { return } await agent.llama.stopServer() - agent.llama = LlamaServer(contextLength: contextLength, tls: tls, host: host, port: port) - agent.backend = OllamaBackend(contextLength: contextLength, tls: tls, host: host, port: port) + agent.createBackend(contextLength: contextLength, tls: tls, host: host, port: port) } private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) { @@ -219,17 +222,9 @@ struct ConversationView: View, Sendable { showErrorAlert = true } - func submit(_ input: String) { + func submit(_ input: String) async { if (agent.status == .processing || agent.status == .coldProcessing) { - Task { - await agent.interrupt() - - Task.detached(priority: .userInitiated) { - try? await Task.sleep(for: .seconds(1)) - await submit(input) - } - } - return + await agent.interrupt() } playSendSound() @@ -281,45 +276,41 @@ struct ConversationView: View, Sendable { } } - Task { - var response: LlamaServer.CompleteResponse + let response: OpenAIBackend.ResponseSummary + do { + response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts, template: model.template, temperature: temperature) + } catch let error as LlamaServerError { + handleResponseError(error) + return + } catch { + print("agent listen threw unexpected error", error as Any) + return + } + + await MainActor.run { + m.text = response.text + m.predictedPerSecond = response.predictedPerSecond ?? -1 + m.responseStartSeconds = response.responseStartSeconds + m.nPredicted = Int64(response.nPredicted ?? -1) + m.modelName = response.modelName + m.updatedAt = Date() + + playReceiveSound() do { - response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts, template: model.template, temperature: temperature) - } catch let error as LlamaServerError { - handleResponseError(error) - return + try viewContext.save() } catch { - print("agent listen threw unexpected error", error as Any) - return + print("error creating message", error.localizedDescription) } - await MainActor.run { - m.text = response.text - m.predictedPerSecond = response.predictedPerSecond ?? -1 - m.responseStartSeconds = response.responseStartSeconds - m.nPredicted = Int64(response.nPredicted ?? -1) - m.modelName = response.modelName - m.updatedAt = Date() - - playReceiveSound() - do { - try viewContext.save() - } catch { - print("error creating message", error.localizedDescription) - } - - if pendingMessage?.text != nil, - !pendingMessage!.text!.isEmpty, - response.text.hasPrefix(agent.pendingMessage), - m == pendingMessage { - pendingMessage = nil - agent.pendingMessage = "" - } - - if conversation != agentConversation { - return - } + if pendingMessage?.text != nil, + !pendingMessage!.text!.isEmpty, + response.text.hasPrefix(agent.pendingMessage), + m == pendingMessage { + pendingMessage = nil + agent.pendingMessage = "" + } + if conversation == agentConversation { messages = agentConversation.orderedMessages } } From ea6a1d56be5a8c987a093c668b33d4b3096d0ebb Mon Sep 17 00:00:00 2001 From: shavit Date: Sun, 3 Mar 2024 17:13:30 -0500 Subject: [PATCH 08/21] Add backend type picker --- mac/FreeChat/Models/NPC/OpenAIBackend.swift | 8 +-- .../ConversationView/ConversationView.swift | 8 ++- .../Views/Settings/AISettingsView.swift | 65 ++++++++++--------- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/mac/FreeChat/Models/NPC/OpenAIBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift index 5b1c2e4..fda2933 100644 --- a/mac/FreeChat/Models/NPC/OpenAIBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -9,10 +9,10 @@ import EventSource actor OpenAIBackend { enum BackendType: String, CaseIterable { - case local = "local" - case llama = "llama" - case openai = "openai" - case ollama = "ollama" + case local = "This Computer (default)" + case llama = "Llama.cpp" + case openai = "OpenAI" + case ollama = "Ollama" var defaultURL: URL { switch self { diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index cd84eee..7ea56b6 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -41,9 +41,9 @@ struct ConversationView: View, Sendable { conversationManager.agent } + // TODO: Use different lists for the remote backends var selectedModel: Model? { - if selectedModelId != AISettingsView.remoteModelOption, - let selectedModelId = self.selectedModelId { + if let selectedModelId { models.first(where: { $0.id?.uuidString == selectedModelId }) } else { models.first @@ -152,11 +152,15 @@ struct ConversationView: View, Sendable { // warmup the agent if it's cold or model has changed Task { + // TODO: Use backend type instead + /* if selectedModelId == AISettingsView.remoteModelOption { await initializeServerRemote() } else { await initializeServerLocal(modelId: selectedModelId) } + */ + await initializeServerRemote() } } diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 17c4e43..42f2631 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -11,8 +11,6 @@ import SwiftUI struct AISettingsView: View { static let title = "Intelligence" private static let customizeModelsId = "customizeModels" - static let remoteModelOption = "remoteModelOption" - private let serverHealthTimer = Timer.publish(every: 3, on: .main, in: .common).autoconnect() @Environment(\.managedObjectContext) private var viewContext @@ -23,6 +21,7 @@ struct AISettingsView: View { animation: .default) private var models: FetchedResults + @AppStorage("backendTypeID") private var backendTypeID: String? @AppStorage("selectedModelId") private var selectedModelId: String? @AppStorage("systemPrompt") private var systemPrompt = DEFAULT_SYSTEM_PROMPT @AppStorage("contextLength") private var contextLength = DEFAULT_CONTEXT_LENGTH @@ -35,7 +34,6 @@ struct AISettingsView: View { @State var pickedModel: String? // Picker selection @State var customizeModels = false // Show add remove models - @State var editRemoteModel = false // Show remote model server @State var editSystemPrompt = false @State var editFormat = false @State var revealAdvanced = false @@ -46,6 +44,10 @@ struct AISettingsView: View { @StateObject var gpu = GPU.shared + private var isUsingLocalServer: Bool { + backendTypeID == OpenAIBackend.BackendType.local.rawValue + } + let contextLengthFormatter: NumberFormatter = { let formatter = NumberFormatter() formatter.minimum = 1 @@ -90,6 +92,21 @@ struct AISettingsView: View { } } + var backendTypePicker: some View { + HStack { + Picker("Backend", selection: $backendTypeID) { + ForEach(OpenAIBackend.BackendType.allCases, id: \.self) { name in + Text(name.rawValue).tag(name.rawValue as String?) + } + } + .onAppear { + if backendTypeID == nil { + backendTypeID = OpenAIBackend.BackendType.local.rawValue + } + } + } + } + var modelPicker: some View { VStack(alignment: .leading) { Picker("Model", selection: $pickedModel) { @@ -101,21 +118,17 @@ struct AISettingsView: View { } } - Divider().tag(nil as String?) - Text("Remote Model (Advanced)").tag(AISettingsView.remoteModelOption as String?) - Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) - }.onReceive(Just(pickedModel)) { _ in + if isUsingLocalServer { + Divider().tag(nil as String?) + Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) + } + } + .onReceive(Just(pickedModel)) { _ in switch pickedModel { case AISettingsView.customizeModelsId: customizeModels = true - editRemoteModel = false - case AISettingsView.remoteModelOption: - customizeModels = false - editRemoteModel = true - selectedModelId = AISettingsView.remoteModelOption case .some(let pickedModelValue): customizeModels = false - editRemoteModel = false selectedModelId = pickedModelValue default: break } @@ -124,21 +137,14 @@ struct AISettingsView: View { switch pickedModel { case AISettingsView.customizeModelsId: customizeModels = true - editRemoteModel = false - case AISettingsView.remoteModelOption: - customizeModels = false - editRemoteModel = true - selectedModelId = AISettingsView.remoteModelOption case .some(let pickedModelValue): customizeModels = false - editRemoteModel = false selectedModelId = pickedModelValue default: break } - } - if !editRemoteModel { + if isUsingLocalServer { Text( "The default model is general purpose, small, and works on most computers. Larger models are slower but wiser. Some models specialize in certain tasks like coding Python. FreeChat is compatible with most models in GGUF format. [Find new models](https://huggingface.co/models?search=GGUF)" ) @@ -154,7 +160,7 @@ struct AISettingsView: View { Text("Prompt format: \(model.template.format.rawValue)") .foregroundColor(Color(NSColor.secondaryLabelColor)) .font(.caption) - } else if editRemoteModel { + } else if !isUsingLocalServer { Text("Prompt format: \(remoteModelTemplate ?? TemplateFormat.vicuna.rawValue)") .foregroundColor(Color(NSColor.secondaryLabelColor)) .font(.caption) @@ -170,7 +176,8 @@ struct AISettingsView: View { content: { if let model = selectedModel { EditFormat(model: model) - } else if editRemoteModel { + } else if !isUsingLocalServer { + // TODO: Check if the editor and model name are needed here EditFormat(modelName: "Remote") } }) @@ -266,8 +273,9 @@ struct AISettingsView: View { Form { Section { systemPromptEditor + backendTypePicker modelPicker - if editRemoteModel { + if !isUsingLocalServer { sectionRemoteModel } } @@ -288,7 +296,7 @@ struct AISettingsView: View { .padding(.top, 2.5) .padding(.bottom, 4) - if !editRemoteModel { + if isUsingLocalServer { Divider() HStack { @@ -311,7 +319,7 @@ struct AISettingsView: View { .frame(width: 24, alignment: .trailing) }.padding(.top, 1) - if gpu.available && !editRemoteModel { + if gpu.available && isUsingLocalServer { Divider() Toggle("Use GPU Acceleration", isOn: $useGPU).padding(.top, 1) @@ -342,7 +350,7 @@ struct AISettingsView: View { .onSubmit(saveFormRemoteServer) .navigationTitle(AISettingsView.title) .onAppear { - if selectedModelId != AISettingsView.remoteModelOption { + if isUsingLocalServer { let selectedModelExists = models .compactMap({ $0.id?.uuidString }) @@ -352,7 +360,6 @@ struct AISettingsView: View { } } pickedModel = selectedModelId - inputServerTLS = serverTLS inputServerHost = serverHost ?? "" inputServerPort = serverPort ?? "" @@ -395,8 +402,6 @@ struct AISettingsView: View { serverPort = inputServerPort serverHealthScore = -1 updateRemoteServerURL() - - selectedModelId = AISettingsView.remoteModelOption } private func updateRemoteServerURL() { From 76e3e45a9b9fa406af019e5e3cfdd10eb760133f Mon Sep 17 00:00:00 2001 From: shavit Date: Mon, 4 Mar 2024 15:27:02 -0500 Subject: [PATCH 09/21] Upgrade EventSource --- mac/FreeChat/Models/NPC/Agent.swift | 1 - mac/FreeChat/Models/NPC/LlamaServer.swift | 8 -------- mac/FreeChat/Models/NPC/OpenAIBackend.swift | 9 ++++----- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 2f29e3c..ffa689f 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -58,7 +58,6 @@ class Agent: ObservableObject { func interrupt() async { if status != .processing, status != .coldProcessing { return } - await llama.interrupt() await backend?.interrupt() } diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index 325839b..daa7c5e 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -1,4 +1,3 @@ -@preconcurrency import EventSource import Foundation import SwiftUI import os.lock @@ -35,7 +34,6 @@ actor LlamaServer { private var process = Process() private var serverUp = false private var serverErrorMessage = "" - private var eventSource: EventSource? private let url: URL private var monitor = Process() @@ -122,12 +120,6 @@ actor LlamaServer { if monitor.isRunning { monitor.terminate() } } - func interrupt() async { - if let eventSource, eventSource.readyState != .closed { - await eventSource.close() - } - } - private func waitForServer() async throws { guard process.isRunning else { return } serverErrorMessage = "" diff --git a/mac/FreeChat/Models/NPC/OpenAIBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift index fda2933..314aed5 100644 --- a/mac/FreeChat/Models/NPC/OpenAIBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -120,10 +120,10 @@ actor OpenAIBackend { return AsyncStream { continuation in Task.detached { - let eventSource = EventSource(request: request) - eventSource.connect() + let eventSource = EventSource() + let dataTask = eventSource.dataTask(for: request) - L: for await event in eventSource.events { + L: for await event in dataTask.events() { guard await !self.interrupted else { break L } switch event { case .open: continue @@ -143,12 +143,11 @@ actor OpenAIBackend { } continuation.finish() - await eventSource.close() } } } - func interrupt() async { interrupted = true } + func interrupt() { interrupted = true } func buildRequest(url: URL, params: CompleteParams, token: String = "none") -> URLRequest { var request = URLRequest(url: url) From dc88b741a084930a2d4c6be74bb518d377b4a6c9 Mon Sep 17 00:00:00 2001 From: shavit Date: Mon, 4 Mar 2024 15:29:24 -0500 Subject: [PATCH 10/21] Move the prompt editor to the same position --- .../Views/Settings/AISettingsView.swift | 94 +++++++++---------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 42f2631..80cf9bf 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -107,6 +107,33 @@ struct AISettingsView: View { } } + var editPromptFormat: some View { + HStack { + if let model = selectedModel { + Text("Prompt format: \(model.template.format.rawValue)") + .foregroundColor(Color(NSColor.secondaryLabelColor)) + .font(.caption) + } else if !isUsingLocalServer { + Text("Prompt format: \(remoteModelTemplate ?? TemplateFormat.vicuna.rawValue)") + .foregroundColor(Color(NSColor.secondaryLabelColor)) + .font(.caption) + } + Button("Edit") { + editFormat = true + } + .buttonStyle(.link).font(.caption) + .offset(x: -4) + } + .sheet(isPresented: $editFormat) { + if let model = selectedModel { + EditFormat(model: model) + } else if !isUsingLocalServer { + // TODO: Check if the editor and model name are needed here + EditFormat(modelName: "Remote") + } + } + } + var modelPicker: some View { VStack(alignment: .leading) { Picker("Model", selection: $pickedModel) { @@ -153,34 +180,17 @@ struct AISettingsView: View { .lineLimit(5) .fixedSize(horizontal: false, vertical: true) .padding(.top, 0.5) + } else { + Text( + "If you have access to a powerful server, you may want to run your model there. Enter the host and port to connect to a remote llama.cpp server. Instructions for running the server can be found [here](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)" + ) + .font(.callout) + .foregroundColor(Color(NSColor.secondaryLabelColor)) + .lineLimit(5) + .fixedSize(horizontal: false, vertical: true) + .padding(.top, 0.5) } - - HStack { - if let model = selectedModel { - Text("Prompt format: \(model.template.format.rawValue)") - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .font(.caption) - } else if !isUsingLocalServer { - Text("Prompt format: \(remoteModelTemplate ?? TemplateFormat.vicuna.rawValue)") - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .font(.caption) - } - Button("Edit") { - editFormat = true - } - .buttonStyle(.link).font(.caption) - .offset(x: -4) - } - .sheet( - isPresented: $editFormat, - content: { - if let model = selectedModel { - EditFormat(model: model) - } else if !isUsingLocalServer { - // TODO: Check if the editor and model name are needed here - EditFormat(modelName: "Remote") - } - }) + editPromptFormat } } @@ -237,16 +247,8 @@ struct AISettingsView: View { } } - var sectionRemoteModel: some View { + var sectionRemoteBackend: some View { Group { - Text( - "If you have access to a powerful server, you may want to run your model there. Enter the host and port to connect to a remote llama.cpp server. Instructions for running the server can be found [here](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)" - ) - .font(.callout) - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .lineLimit(5) - .fixedSize(horizontal: false, vertical: true) - .padding(.top, 0.5) HStack { TextField("Server host", text: $inputServerHost, prompt: Text("yourserver.net")) .textFieldStyle(.plain) @@ -276,7 +278,7 @@ struct AISettingsView: View { backendTypePicker modelPicker if !isUsingLocalServer { - sectionRemoteModel + sectionRemoteBackend } } Section { @@ -296,20 +298,16 @@ struct AISettingsView: View { .padding(.top, 2.5) .padding(.bottom, 4) - if isUsingLocalServer { - Divider() - - HStack { - Text("Context Length") - TextField("", value: $contextLength, formatter: contextLengthFormatter) - .padding(.vertical, -8) - .padding(.trailing, -10) - } - .padding(.top, 0.5) + Divider() + HStack { + Text("Context Length") + TextField("", value: $contextLength, formatter: contextLengthFormatter) + .padding(.vertical, -8) + .padding(.trailing, -10) } + .padding(.top, 0.5) Divider() - HStack { Text("Temperature") Slider(value: $temperature, in: 0...2, step: 0.1).offset(y: 1) From 52fd9d4d6ff85cc184b0779861914b0dbe4b12b4 Mon Sep 17 00:00:00 2001 From: shavit Date: Mon, 4 Mar 2024 15:51:26 -0500 Subject: [PATCH 11/21] Clean unmatched quotes from the backend output --- mac/FreeChat.xcodeproj/project.pbxproj | 4 ++++ mac/FreeChat/Models/NPC/OpenAIBackend.swift | 2 +- .../Models/NPC/String+TrimQuotes.swift | 23 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 mac/FreeChat/Models/NPC/String+TrimQuotes.swift diff --git a/mac/FreeChat.xcodeproj/project.pbxproj b/mac/FreeChat.xcodeproj/project.pbxproj index dc44399..f953169 100644 --- a/mac/FreeChat.xcodeproj/project.pbxproj +++ b/mac/FreeChat.xcodeproj/project.pbxproj @@ -59,6 +59,7 @@ A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F617572A7836AE00F2048C /* Message+Extensions.swift */; }; A1F6175B2A7838F700F2048C /* Conversation+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */; }; DE16617B2B8A40D100826556 /* OpenAIBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE16617A2B8A40D100826556 /* OpenAIBackend.swift */; }; + DE7250E12B966D23006A76DF /* String+TrimQuotes.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */; }; DEA8CF572B51938B007A4CE7 /* FreeChatAppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */; }; DEEA39CC2B586F3800992592 /* ServerHealth.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEEA39CB2B586F3800992592 /* ServerHealth.swift */; }; /* End PBXBuildFile section */ @@ -178,6 +179,7 @@ A1F617572A7836AE00F2048C /* Message+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Message+Extensions.swift"; sourceTree = ""; }; A1F6175A2A7838F700F2048C /* Conversation+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Conversation+Extensions.swift"; sourceTree = ""; }; DE16617A2B8A40D100826556 /* OpenAIBackend.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = OpenAIBackend.swift; path = FreeChat/Models/NPC/OpenAIBackend.swift; sourceTree = SOURCE_ROOT; }; + DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+TrimQuotes.swift"; sourceTree = ""; }; DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FreeChatAppDelegate.swift; sourceTree = ""; }; DEEA39CB2B586F3800992592 /* ServerHealth.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ServerHealth.swift; sourceTree = ""; }; /* End PBXFileReference section */ @@ -263,6 +265,7 @@ A17A2E132A79A005006CDD90 /* LlamaServer.swift */, DE16617A2B8A40D100826556 /* OpenAIBackend.swift */, DEEA39CB2B586F3800992592 /* ServerHealth.swift */, + DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */, A137A3822AB4FD4800BE1AE0 /* freechat-server */, A1A286F32A7E17750004967A /* server-watchdog */, A1A286F92A7E197F0004967A /* README.md */, @@ -599,6 +602,7 @@ A16FFF8B2B2E35D200E6AAE2 /* GPU.swift in Sources */, A18A8BB32B24FC0400D2197C /* AISettingsView.swift in Sources */, A1F617562A782E4F00F2048C /* ConversationView.swift in Sources */, + DE7250E12B966D23006A76DF /* String+TrimQuotes.swift in Sources */, A13C8C5A2A8FEEE400EC18D8 /* SplashCodeSyntaxHighlighter.swift in Sources */, A15D50D42A80BCA900FC1681 /* SettingsView.swift in Sources */, ); diff --git a/mac/FreeChat/Models/NPC/OpenAIBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift index 314aed5..c3054f9 100644 --- a/mac/FreeChat/Models/NPC/OpenAIBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -133,7 +133,7 @@ actor OpenAIBackend { case .message(let message): if let response = try Response.from(data: message.data?.data(using: .utf8)), let choice = response.choices.first { - continuation.yield(choice.delta.content) + continuation.yield(choice.delta.content.removeUnmatchedTrailingQuote()) if choice.finishReason != nil { break L } } case .closed: diff --git a/mac/FreeChat/Models/NPC/String+TrimQuotes.swift b/mac/FreeChat/Models/NPC/String+TrimQuotes.swift new file mode 100644 index 0000000..c71844c --- /dev/null +++ b/mac/FreeChat/Models/NPC/String+TrimQuotes.swift @@ -0,0 +1,23 @@ +// +// String+TrimQuotes.swift +// FreeChat +// + +import Foundation + +extension String { + func removeUnmatchedTrailingQuote() -> String { + guard self.last == "\"" else { return self } + + // Count the number of quotes in the string + let countOfQuotes = self.filter({ $0 == "\"" }).count + guard countOfQuotes % 2 != 0 else { return self } + var outputString = self + // If there is an odd number of quotes, remove the last one + if let indexOfLastQuote = self.lastIndex(of: "\"") { + outputString.remove(at: indexOfLastQuote) + } + + return outputString + } +} From 18882b2f610475b112611c148318b6a6a8a7419a Mon Sep 17 00:00:00 2001 From: shavit Date: Mon, 4 Mar 2024 16:01:06 -0500 Subject: [PATCH 12/21] Initialize backend based on the backend type --- .../Views/ConversationView/ConversationView.swift | 15 +++++++-------- mac/FreeChat/Views/Settings/AISettingsView.swift | 1 - 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 7ea56b6..fef9ae5 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -10,9 +10,12 @@ import MarkdownUI import Foundation struct ConversationView: View, Sendable { + typealias BackendType = OpenAIBackend.BackendType + @Environment(\.managedObjectContext) private var viewContext @EnvironmentObject private var conversationManager: ConversationManager + @AppStorage("backendTypeID") private var backendTypeID: String? @AppStorage("selectedModelId") private var selectedModelId: String? @AppStorage("systemPrompt") private var systemPrompt: String = DEFAULT_SYSTEM_PROMPT @AppStorage("contextLength") private var contextLength: Int = DEFAULT_CONTEXT_LENGTH @@ -152,15 +155,12 @@ struct ConversationView: View, Sendable { // warmup the agent if it's cold or model has changed Task { - // TODO: Use backend type instead - /* - if selectedModelId == AISettingsView.remoteModelOption { - await initializeServerRemote() - } else { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local + if backendType == .local { await initializeServerLocal(modelId: selectedModelId) + } else { + await initializeServerRemote() } - */ - await initializeServerRemote() } } @@ -174,7 +174,6 @@ struct ConversationView: View, Sendable { modelPath != llamaPath { await agent.llama.stopServer() agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) - // TODO: Check the backend type let backendURL = OpenAIBackend.BackendType.local.defaultURL agent.createBackend(contextLength: contextLength, tls: false, host: backendURL.host()!, port: "\(backendURL.port!)") } diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 80cf9bf..a1197a8 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -128,7 +128,6 @@ struct AISettingsView: View { if let model = selectedModel { EditFormat(model: model) } else if !isUsingLocalServer { - // TODO: Check if the editor and model name are needed here EditFormat(modelName: "Remote") } } From d936121d3291c4e8e56999b8bafc109d733f4883 Mon Sep 17 00:00:00 2001 From: shavit Date: Wed, 6 Mar 2024 18:00:49 -0500 Subject: [PATCH 13/21] Add backends * Create backends for local and remote servers * List models for each backend * Save backend type * Change backends during chat conversations --- mac/FreeChat.xcodeproj/project.pbxproj | 16 ++ mac/FreeChat/Models/NPC/Agent.swift | 28 ++- mac/FreeChat/Models/NPC/Backend.swift | 162 +++++++++++++ mac/FreeChat/Models/NPC/LlamaBackend.swift | 53 +++++ mac/FreeChat/Models/NPC/LlamaServer.swift | 21 -- mac/FreeChat/Models/NPC/LocalBackend.swift | 30 +++ mac/FreeChat/Models/NPC/OllamaBackend.swift | 55 +++++ mac/FreeChat/Models/NPC/OpenAIBackend.swift | 217 +++--------------- .../ConversationView/ConversationView.swift | 78 +++---- .../Views/Settings/AISettingsView.swift | 89 +++++-- 10 files changed, 467 insertions(+), 282 deletions(-) create mode 100644 mac/FreeChat/Models/NPC/Backend.swift create mode 100644 mac/FreeChat/Models/NPC/LlamaBackend.swift create mode 100644 mac/FreeChat/Models/NPC/LocalBackend.swift create mode 100644 mac/FreeChat/Models/NPC/OllamaBackend.swift diff --git a/mac/FreeChat.xcodeproj/project.pbxproj b/mac/FreeChat.xcodeproj/project.pbxproj index f953169..d9c6c52 100644 --- a/mac/FreeChat.xcodeproj/project.pbxproj +++ b/mac/FreeChat.xcodeproj/project.pbxproj @@ -61,6 +61,10 @@ DE16617B2B8A40D100826556 /* OpenAIBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE16617A2B8A40D100826556 /* OpenAIBackend.swift */; }; DE7250E12B966D23006A76DF /* String+TrimQuotes.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */; }; DEA8CF572B51938B007A4CE7 /* FreeChatAppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */; }; + DEAE3D482B987DE700257A69 /* Backend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEAE3D472B987DE700257A69 /* Backend.swift */; }; + DEAE3D4A2B987EA400257A69 /* OllamaBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEAE3D492B987EA400257A69 /* OllamaBackend.swift */; }; + DEAE3D4C2B987EB300257A69 /* LlamaBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEAE3D4B2B987EB300257A69 /* LlamaBackend.swift */; }; + DEAE3D4E2B987EBC00257A69 /* LocalBackend.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEAE3D4D2B987EBC00257A69 /* LocalBackend.swift */; }; DEEA39CC2B586F3800992592 /* ServerHealth.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEEA39CB2B586F3800992592 /* ServerHealth.swift */; }; /* End PBXBuildFile section */ @@ -181,6 +185,10 @@ DE16617A2B8A40D100826556 /* OpenAIBackend.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = OpenAIBackend.swift; path = FreeChat/Models/NPC/OpenAIBackend.swift; sourceTree = SOURCE_ROOT; }; DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+TrimQuotes.swift"; sourceTree = ""; }; DEA8CF562B51938B007A4CE7 /* FreeChatAppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FreeChatAppDelegate.swift; sourceTree = ""; }; + DEAE3D472B987DE700257A69 /* Backend.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Backend.swift; sourceTree = ""; }; + DEAE3D492B987EA400257A69 /* OllamaBackend.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OllamaBackend.swift; sourceTree = ""; }; + DEAE3D4B2B987EB300257A69 /* LlamaBackend.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaBackend.swift; sourceTree = ""; }; + DEAE3D4D2B987EBC00257A69 /* LocalBackend.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalBackend.swift; sourceTree = ""; }; DEEA39CB2B586F3800992592 /* ServerHealth.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ServerHealth.swift; sourceTree = ""; }; /* End PBXFileReference section */ @@ -263,7 +271,11 @@ A137A3872AB502DB00BE1AE0 /* ggml-metal.metal */, A17A2E122A79A005006CDD90 /* Agent.swift */, A17A2E132A79A005006CDD90 /* LlamaServer.swift */, + DEAE3D472B987DE700257A69 /* Backend.swift */, DE16617A2B8A40D100826556 /* OpenAIBackend.swift */, + DEAE3D492B987EA400257A69 /* OllamaBackend.swift */, + DEAE3D4B2B987EB300257A69 /* LlamaBackend.swift */, + DEAE3D4D2B987EBC00257A69 /* LocalBackend.swift */, DEEA39CB2B586F3800992592 /* ServerHealth.swift */, DE7250E02B966D22006A76DF /* String+TrimQuotes.swift */, A137A3822AB4FD4800BE1AE0 /* freechat-server */, @@ -583,13 +595,17 @@ A1F617582A7836AE00F2048C /* Message+Extensions.swift in Sources */, A13C8C682A902A1200EC18D8 /* CGKeycode+Extensions.swift in Sources */, A15D50D22A7F539800FC1681 /* NavList.swift in Sources */, + DEAE3D4C2B987EB300257A69 /* LlamaBackend.swift in Sources */, DE16617B2B8A40D100826556 /* OpenAIBackend.swift in Sources */, A1156D342AD1F5EF00081313 /* Templates.swift in Sources */, A1D4B49D2B9A780B00B9C4BE /* AgentDefaults.swift in Sources */, A1F617262A782AA100F2048C /* FreeChat.swift in Sources */, A1156D2F2AD0954C00081313 /* TemplateManager.swift in Sources */, A1E4A6942A82B41F00BF9D34 /* Model+Extensions.swift in Sources */, + DEAE3D4A2B987EA400257A69 /* OllamaBackend.swift in Sources */, A12B52DE2AA5228100658707 /* EditModels.swift in Sources */, + DEAE3D482B987DE700257A69 /* Backend.swift in Sources */, + DEAE3D4E2B987EBC00257A69 /* LocalBackend.swift in Sources */, A17AB1C22ABB4B5E00CD3100 /* CircleMenuStyle.swift in Sources */, A15D50CF2A7EF73E00FC1681 /* MessageTextField.swift in Sources */, A1CA32442AAF877600F9D488 /* ConversationManager.swift in Sources */, diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index ffa689f..844b6c2 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -21,7 +21,7 @@ class Agent: ObservableObject { // each agent runs their own server var llama: LlamaServer - private var backend: OpenAIBackend! + private var backend: Backend! init(id: String, prompt: String, systemPrompt: String, modelPath: String, contextLength: Int) { self.id = id @@ -30,25 +30,32 @@ class Agent: ObservableObject { llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) } - func createBackend(contextLength: Int, tls: Bool, host: String, port: String) { - self.backend = OpenAIBackend(backend: .ollama, contextLength: contextLength, tls: tls, host: host, port: port) + func createBackend(_ backend: BackendType, contextLength: Int, baseURL: URL, apiKey: String?) { + switch backend { + case .local: + self.backend = LocalBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + case .llama: + self.backend = LlamaBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + case .openai: + self.backend = OpenAIBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + case .ollama: + self.backend = OllamaBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + } } // this is the main loop of the agent // listen -> respond -> update mental model and save checkpoint // we respond before updating to avoid a long delay after user input - func listenThinkRespond( - speakerId: String, messages: [String], template: Template, temperature: Double? - ) async throws -> OpenAIBackend.ResponseSummary { + func listenThinkRespond(speakerId: String, messages: [String]) async throws -> CompleteResponseSummary { status = status == .cold ? .coldProcessing : .processing pendingMessage = "" - for try await partialResponse in try await backend!.complete(messages: messages) { + for try await partialResponse in try await backend.complete(messages: messages) { self.pendingMessage += partialResponse self.prompt = pendingMessage } status = .ready - return OpenAIBackend.ResponseSummary(text: pendingMessage, responseStartSeconds: 0) + return CompleteResponseSummary(text: pendingMessage, responseStartSeconds: 0) } func handleCompletionProgress(partialResponse: String) { @@ -63,14 +70,11 @@ class Agent: ObservableObject { func warmup() async throws { if prompt.isEmpty, systemPrompt.isEmpty { return } - // TODO: Implement this part - /* do { - _ = try await llama.complete(prompt: prompt, stop: nil, temperature: nil) + _ = try await backend.complete(messages: []) status = .ready } catch { status = .cold } - */ } } diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift new file mode 100644 index 0000000..26067e5 --- /dev/null +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -0,0 +1,162 @@ +// +// Backend.swift +// FreeChat +// + +import Foundation +import EventSource + +protocol Backend: Actor, Sendable { + var type: BackendType { get } + var baseURL: URL { get } + var apiKey: String? { get } + var interrupted: Bool { get set } + + func complete(messages: [String]) async throws -> AsyncStream + func buildRequest(path: String, params: CompleteParams) -> URLRequest + func interrupt() async + + func listModels() async throws -> [String] +} + +extension Backend { + func complete(messages: [String]) async throws -> AsyncStream { + let messages = [RoleMessage(role: "system", content: "you know")] + + messages.map({ RoleMessage(role: "user", content: $0) }) + let params = CompleteParams(messages: messages, model: "orca-mini") + let request = buildRequest(path: "/v1/chat/completions", params: params) + self.interrupted = false + + return AsyncStream { continuation in + Task.detached { + let eventSource = EventSource() + let dataTask = eventSource.dataTask(for: request) + L: for await event in dataTask.events() { + guard await !self.interrupted else { break L } + switch event { + case .open: continue + case .error(let error): + print("ollama EventSource server error:", error.localizedDescription) + break L + case .message(let message): + if let response = try CompleteResponse.from(data: message.data?.data(using: .utf8)), + let choice = response.choices.first { + continuation.yield(choice.delta.content.removeUnmatchedTrailingQuote()) + if choice.finishReason != nil { break L } + } + case .closed: + print("ollama EventSource closed") + break L + } + } + + continuation.finish() + } + } + } + + func interrupt() async { interrupted = true } + + func buildRequest(path: String, params: CompleteParams) -> URLRequest { + let apiKey = "" + var request = URLRequest(url: baseURL.appendingPathComponent("/v1/chat/completions")) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("text/event-stream", forHTTPHeaderField: "Accept") + request.setValue("keep-alive", forHTTPHeaderField: "Connection") + request.setValue("Bearer: \(apiKey)", forHTTPHeaderField: "Authorization") + request.httpBody = params.toJSON().data(using: .utf8) + + return request + } +} + +enum BackendType: String, CaseIterable { + case local = "This Computer (default)" + case llama = "Llama.cpp" + case openai = "OpenAI" + case ollama = "Ollama" + + var defaultURL: URL { + switch self { + case .local: return URL(string: "http://127.0.0.1:8690")! + case .llama: return URL(string: "http://127.0.0.1:8690")! + case .ollama: return URL(string: "http://127.0.0.1:11434")! + case .openai: return URL(string: "https://api.openai.com")! + } + } +} + +struct RoleMessage: Codable { + let role: String + let content: String +} + +struct CompleteParams: Encodable { + struct OllamaOptions: Encodable { + enum Mirostat: Int, Encodable { + case disabled = 0 + case v1 = 1 + case v2 = 2 + } + let mirostat: Mirostat + let mirostatETA: Float = 0.1 + let mirostatTAU: Float = 5 + let numCTX = 2048 + let numGQA = 1 + let numGPU: Int? = nil + let numThread: Int? = nil + let repeatLastN = 64 + let repeatPenalty: Float = 1.1 + let temperature: Float = 0.7 + let seed: Int? = nil + let stop: String? = nil + let tfsZ: Float? = nil + let numPredict = 128 + let topK = 40 + let topP: Float = 0.9 + } + let messages: [RoleMessage] + let model: String + let format: String? = nil + let options: OllamaOptions? = nil + let template: String? = nil + let stream = true + let keepAlive = true + + func toJSON() -> String { + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + let jsonData = try? encoder.encode(self) + return String(data: jsonData!, encoding: .utf8)! + } +} + +struct CompleteResponse: Decodable { + struct Choice: Decodable { + let index: Int + let delta: RoleMessage + let finishReason: String? + } + let id: String + let object: String + let created: Int + let model: String + let systemFingerprint: String + let choices: [Choice] + + static func from(data: Data?) throws -> CompleteResponse? { + guard let data else { return nil } + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(CompleteResponse.self, from: data) + } +} + +struct CompleteResponseSummary { + var text: String + var responseStartSeconds: Double + var predictedPerSecond: Double? + var modelName: String? + var nPredicted: Int? + } diff --git a/mac/FreeChat/Models/NPC/LlamaBackend.swift b/mac/FreeChat/Models/NPC/LlamaBackend.swift new file mode 100644 index 0000000..b732e4b --- /dev/null +++ b/mac/FreeChat/Models/NPC/LlamaBackend.swift @@ -0,0 +1,53 @@ +// +// LlamaBackend.swift +// FreeChat +// + +import Foundation + +actor LlamaBackend: Backend { + var type: BackendType = .llama + var baseURL: URL + var apiKey: String? + var interrupted = false + + private let contextLength: Int + + init(contextLength: Int, baseURL: URL, apiKey: String?) { + self.contextLength = contextLength + self.baseURL = baseURL + self.apiKey = apiKey + } + + deinit { interrupted = true } + + struct ModelListResponse: Decodable { + struct Model: Decodable { + struct Meta: Decodable { + let nCtxTrain: Int + let nEmbd: Int + let nParams: Int + let nVocab: Int + let size: Int + let vocabType: Int + } + let id: String + let created: Int + let meta: Meta + let object: String + } + let data: [Model] + + static func from(data: Data) throws -> ModelListResponse { + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(ModelListResponse.self, from: data) + } + } + + nonisolated func listModels() async throws -> [String] { + let url = await baseURL.appendingPathComponent("/v1/models") + let (data, _) = try await URLSession.shared.data(from: url) + return try ModelListResponse.from(data: data).data.compactMap({ $0.id.components(separatedBy: "/").last }) + } +} diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index daa7c5e..67d1369 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -2,27 +2,6 @@ import Foundation import SwiftUI import os.lock -func removeUnmatchedTrailingQuote(_ inputString: String) -> String { - var outputString = inputString - if inputString.last != "\"" { return outputString } - - // Count the number of quotes in the string - let countOfQuotes = outputString.reduce( - 0, - { (count, character) -> Int in - return character == "\"" ? count + 1 : count - }) - - // If there is an odd number of quotes, remove the last one - if countOfQuotes % 2 != 0 { - if let indexOfLastQuote = outputString.lastIndex(of: "\"") { - outputString.remove(at: indexOfLastQuote) - } - } - - return outputString -} - actor LlamaServer { var modelPath: String? diff --git a/mac/FreeChat/Models/NPC/LocalBackend.swift b/mac/FreeChat/Models/NPC/LocalBackend.swift new file mode 100644 index 0000000..f24a42d --- /dev/null +++ b/mac/FreeChat/Models/NPC/LocalBackend.swift @@ -0,0 +1,30 @@ +// +// LocalBackend.swift +// FreeChat +// + +import Foundation + +actor LocalBackend: Backend { + var type: BackendType = .local + var baseURL: URL + var apiKey: String? + var interrupted = false + + private let contextLength: Int + + init(contextLength: Int, baseURL: URL, apiKey: String?) { + self.contextLength = contextLength + self.baseURL = baseURL + self.apiKey = apiKey + } + + deinit { interrupted = true } + + func listModels() async throws -> [String] { + let req = Model.fetchRequest() + req.sortDescriptors = [NSSortDescriptor(key: "size", ascending: true)] + let context = PersistenceController.shared.container.newBackgroundContext() + return try context.fetch(req).compactMap({ $0.url?.path(percentEncoded: false) }) + } +} diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift new file mode 100644 index 0000000..42a3ba7 --- /dev/null +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -0,0 +1,55 @@ +// +// OllamaBackend.swift +// FreeChat +// + +import Foundation + +actor OllamaBackend: Backend { + var type: BackendType = .ollama + var baseURL: URL + var apiKey: String? + var interrupted = false + + private let contextLength: Int + + init(contextLength: Int, baseURL: URL, apiKey: String?) { + self.contextLength = contextLength + self.baseURL = baseURL + self.apiKey = apiKey + } + + deinit { interrupted = true } + + struct TagsResponse: Decodable { + struct Model: Decodable { + struct Details: Decodable { + let parentModel: String? + let format: String + let family: String + let families: [String]? + let parameterSize: String + let quantizationLevel: String + } + let name: String + let model: String + let modifiedAt: String + let size: Int + let digest: String + let details: Details + } + let models: [Model] + + static func from(data: Data) throws -> TagsResponse { + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + return try decoder.decode(TagsResponse.self, from: data) + } + } + + nonisolated func listModels() async throws -> [String] { + let url = await baseURL.appendingPathComponent("/api/tags") + let (data, _) = try await URLSession.shared.data(from: url) + return try TagsResponse.from(data: data).models.map({ $0.name }) + } +} diff --git a/mac/FreeChat/Models/NPC/OpenAIBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift index c3054f9..dcebc1d 100644 --- a/mac/FreeChat/Models/NPC/OpenAIBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -4,194 +4,43 @@ // import Foundation -import EventSource -actor OpenAIBackend { - - enum BackendType: String, CaseIterable { - case local = "This Computer (default)" - case llama = "Llama.cpp" - case openai = "OpenAI" - case ollama = "Ollama" - - var defaultURL: URL { - switch self { - case .local: return URL(string: "http://127.0.0.1:8690")! - case .llama: return URL(string: "http://127.0.0.1:8690")! - case .ollama: return URL(string: "http://127.0.0.1:11434")! - case .openai: return URL(string: "https://api.openai.com")! - } - } - } - - struct RoleMessage: Codable { - let role: String - let content: String - } +actor OpenAIBackend: Backend { + var type: BackendType = .openai + let baseURL: URL + let apiKey: String? + var interrupted: Bool = false - struct CompleteParams: Encodable { - struct OllamaOptions: Encodable { - enum Mirostat: Int, Encodable { - case disabled = 0 - case v1 = 1 - case v2 = 2 - } - let mirostat: Mirostat - let mirostatETA: Float = 0.1 - let mirostatTAU: Float = 5 - let numCTX = 2048 - let numGQA = 1 - let numGPU: Int? = nil - let numThread: Int? = nil - let repeatLastN = 64 - let repeatPenalty: Float = 1.1 - let temperature: Float = 0.7 - let seed: Int? = nil - let stop: String? = nil - let tfsZ: Float? = nil - let numPredict = 128 - let topK = 40 - let topP: Float = 0.9 - } - let messages: [RoleMessage] - let model: String - let format: String? = nil - let options: OllamaOptions? = nil - let template: String? = nil - let stream = true - let keepAlive = true - - func toJSON() -> String { - let encoder = JSONEncoder() - encoder.keyEncodingStrategy = .convertToSnakeCase - let jsonData = try? encoder.encode(self) - return String(data: jsonData!, encoding: .utf8)! - } - } - - struct Response: Decodable { - struct Choice: Decodable { - let index: Int - let delta: RoleMessage - let finishReason: String? - } - let id: String - let object: String - let created: Int - let model: String - let systemFingerprint: String - let choices: [Choice] - - static func from(data: Data?) throws -> Response? { - guard let data else { return nil } - let decoder = JSONDecoder() - decoder.keyDecodingStrategy = .convertFromSnakeCase - return try decoder.decode(Response.self, from: data) - } - } - - struct ResponseSummary { - var text: String - var responseStartSeconds: Double - var predictedPerSecond: Double? - var modelName: String? - var nPredicted: Int? - } - - private var interrupted = false - private let contextLength: Int - private let baseURL: URL - private let backendType: BackendType - init(backend: BackendType, contextLength: Int, tls: Bool, host: String, port: String) { + init(contextLength: Int, baseURL: URL, apiKey: String?) { self.contextLength = contextLength - self.baseURL = URL(string: "\(tls ? "https" : "http")://\(host):\(port)")! - self.backendType = backend - } - - func complete(messages: [String]) throws -> AsyncStream { - let messages = [RoleMessage(role: "system", content: "you know")] - + messages.map({ RoleMessage(role: "user", content: $0) }) - let params = CompleteParams(messages: messages, model: "orca-mini") - let url = baseURL.appendingPathComponent("/v1/chat/completions") - let request = buildRequest(url: url, params: params) - interrupted = false - - return AsyncStream { continuation in - Task.detached { - let eventSource = EventSource() - let dataTask = eventSource.dataTask(for: request) - - L: for await event in dataTask.events() { - guard await !self.interrupted else { break L } - switch event { - case .open: continue - case .error(let error): - print("ollama EventSource server error:", error.localizedDescription) - break L - case .message(let message): - if let response = try Response.from(data: message.data?.data(using: .utf8)), - let choice = response.choices.first { - continuation.yield(choice.delta.content.removeUnmatchedTrailingQuote()) - if choice.finishReason != nil { break L } - } - case .closed: - print("ollama EventSource closed") - break L - } - } - - continuation.finish() - } - } - } - - func interrupt() { interrupted = true } - - func buildRequest(url: URL, params: CompleteParams, token: String = "none") -> URLRequest { - var request = URLRequest(url: url) - request.httpMethod = "POST" - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.setValue("text/event-stream", forHTTPHeaderField: "Accept") - request.setValue("keep-alive", forHTTPHeaderField: "Connection") - request.setValue("Bearer: \(token)", forHTTPHeaderField: "Authorization") - request.httpBody = params.toJSON().data(using: .utf8) - - return request - } - - // MARK: - List models - - struct TagsResponse: Decodable { - struct Model: Decodable { - struct Details: Decodable { - let parentModel: String? - let format: String - let family: String - let families: [String]? - let parameterSize: String - let quantizationLevel: String - } - let name: String - let model: String - let modifiedAt: String - let size: Int - let digest: String - let details: Details - } - let models: [Model] - - static func from(data: Data) throws -> TagsResponse { - let decoder = JSONDecoder() - decoder.keyDecodingStrategy = .convertFromSnakeCase - return try decoder.decode(TagsResponse.self, from: data) - } - } - - nonisolated func fetchOllamaModels() async throws -> TagsResponse { - let url = baseURL.appendingPathComponent("/api/tags") - let (data, _) = try await URLSession.shared.data(from: url) - return try TagsResponse.from(data: data) + self.baseURL = baseURL + self.apiKey = apiKey + } + + deinit { interrupted = true } + + nonisolated func listModels() -> [String] { + [ + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4-1106-vision-preview", + "gpt-4", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0613", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-instruct", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + "babbage-002", + "davinci-002", + ] } } diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index fef9ae5..14ff12b 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -10,8 +10,6 @@ import MarkdownUI import Foundation struct ConversationView: View, Sendable { - typealias BackendType = OpenAIBackend.BackendType - @Environment(\.managedObjectContext) private var viewContext @EnvironmentObject private var conversationManager: ConversationManager @@ -20,16 +18,11 @@ struct ConversationView: View, Sendable { @AppStorage("systemPrompt") private var systemPrompt: String = DEFAULT_SYSTEM_PROMPT @AppStorage("contextLength") private var contextLength: Int = DEFAULT_CONTEXT_LENGTH @AppStorage("playSoundEffects") private var playSoundEffects = true - @AppStorage("temperature") private var temperature: Double? @AppStorage("useGPU") private var useGPU: Bool = DEFAULT_USE_GPU @AppStorage("serverHost") private var serverHost: String? @AppStorage("serverPort") private var serverPort: String? @AppStorage("serverTLS") private var serverTLS: Bool? - - @FetchRequest( - sortDescriptors: [NSSortDescriptor(keyPath: \Model.size, ascending: true)], - animation: .default) - private var models: FetchedResults + @AppStorage("openAIToken") private var openAIToken: String? private static let SEND = NSDataAsset(name: "ESM_Perfect_App_Button_2_Organic_Simple_Classic_Game_Click") private static let PING = NSDataAsset(name: "ESM_POWER_ON_SYNTH") @@ -44,15 +37,6 @@ struct ConversationView: View, Sendable { conversationManager.agent } - // TODO: Use different lists for the remote backends - var selectedModel: Model? { - if let selectedModelId { - models.first(where: { $0.id?.uuidString == selectedModelId }) - } else { - models.first - } - } - @State var pendingMessage: Message? @State var messages: [Message] = [] @@ -78,8 +62,8 @@ struct ConversationView: View, Sendable { if m == pendingMessage { MessageView(pendingMessage!, overrideText: pendingMessageText, agentStatus: agent.status) .onAppear { - scrollToLastIfRecent(proxy) - } + scrollToLastIfRecent(proxy) + } .opacity(showResponse ? 1 : 0) .animation(.interpolatingSpring(stiffness: 170, damping: 20), value: showResponse) .id("\(m.id)\(m.updatedAt as Date?)") @@ -107,20 +91,22 @@ struct ConversationView: View, Sendable { autoScroll(proxy) } } + .onReceive(NotificationCenter.default.publisher(for: NSNotification.Name("backendTypeIDDidChange"))) { _ in + initializeBackends() + } } .textSelection(.enabled) .safeAreaInset(edge: .bottom, spacing: 0) { MessageTextField { s in Task { - // TODO: Disable or cancel if busy await submit(s) } } } - .frame(maxWidth: .infinity) - .onAppear { showConversation(conversation) } - .onChange(of: conversation) { nextConvo in showConversation(nextConvo) } - .onChange(of: selectedModelId) { showConversation(conversation, modelId: $0) } + .frame(maxWidth: .infinity) + .onAppear { showConversation(conversation) } + .onChange(of: conversation) { nextConvo in showConversation(nextConvo) } + .onChange(of: selectedModelId) { showConversation(conversation, modelId: $0) } .navigationTitle(conversation.titleWithDefault) .alert(isPresented: $showErrorAlert, error: llamaError) { _ in Button("OK") { @@ -145,27 +131,25 @@ struct ConversationView: View, Sendable { } private func showConversation(_ c: Conversation, modelId: String? = nil) { - guard - let selectedModelId = modelId ?? self.selectedModelId, - !selectedModelId.isEmpty - else { return } - messages = c.orderedMessages - + initializeBackends() + } - // warmup the agent if it's cold or model has changed + private func initializeBackends() { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local Task { - let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local if backendType == .local { - await initializeServerLocal(modelId: selectedModelId) + await initializeBackendLocal() } else { - await initializeServerRemote() + await initializeBackendRemote(backend: backendType) } } } - private func initializeServerLocal(modelId: String) async { - guard let id = UUID(uuidString: modelId) else { return } + private func initializeBackendLocal() async { + guard let selectedModelId, !selectedModelId.isEmpty, + let id = UUID(uuidString: selectedModelId) + else { return } let llamaPath = await agent.llama.modelPath let req = Model.fetchRequest() req.predicate = NSPredicate(format: "id == %@", id as CVarArg) @@ -174,18 +158,19 @@ struct ConversationView: View, Sendable { modelPath != llamaPath { await agent.llama.stopServer() agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) - let backendURL = OpenAIBackend.BackendType.local.defaultURL - agent.createBackend(contextLength: contextLength, tls: false, host: backendURL.host()!, port: "\(backendURL.port!)") + + let baseURL = BackendType.local.defaultURL + agent.createBackend(.local, contextLength: contextLength, baseURL: baseURL, apiKey: openAIToken) + } } - private func initializeServerRemote() async { - guard let tls = serverTLS, - let host = serverHost, - let port = serverPort + private func initializeBackendRemote(backend: BackendType) async { + guard let tls = serverTLS, let host = serverHost, let port = serverPort else { return } await agent.llama.stopServer() - agent.createBackend(contextLength: contextLength, tls: tls, host: host, port: port) + let baseURL = URL(string: "\(tls ? "https" : "http")://\(host):\(port)")! + agent.createBackend(backend, contextLength: contextLength, baseURL: baseURL, apiKey: openAIToken) } private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) { @@ -231,9 +216,6 @@ struct ConversationView: View, Sendable { } playSendSound() - - guard let model = selectedModel else { return } - showUserMessage = false engageAutoScroll() @@ -279,9 +261,9 @@ struct ConversationView: View, Sendable { } } - let response: OpenAIBackend.ResponseSummary + let response: CompleteResponseSummary do { - response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts, template: model.template, temperature: temperature) + response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts) } catch let error as LlamaServerError { handleResponseError(error) return diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index a1197a8..76d7408 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -16,6 +16,7 @@ struct AISettingsView: View { @Environment(\.managedObjectContext) private var viewContext @EnvironmentObject var conversationManager: ConversationManager + @available(*, deprecated, message: "use modelList instead") @FetchRequest( sortDescriptors: [NSSortDescriptor(keyPath: \Model.size, ascending: true)], animation: .default) @@ -30,6 +31,7 @@ struct AISettingsView: View { @AppStorage("serverTLS") private var serverTLS: Bool = false @AppStorage("serverHost") private var serverHost: String? @AppStorage("serverPort") private var serverPort: String? + @AppStorage("openAIToken") private var openAIToken: String? @AppStorage("remoteModelTemplate") var remoteModelTemplate: String? @State var pickedModel: String? // Picker selection @@ -37,15 +39,15 @@ struct AISettingsView: View { @State var editSystemPrompt = false @State var editFormat = false @State var revealAdvanced = false - @State var inputServerTLS: Bool = false @State var inputServerHost: String = "" @State var inputServerPort: String = "" @State var serverHealthScore: Double = -1 + @State var modelList: [String] = [] @StateObject var gpu = GPU.shared private var isUsingLocalServer: Bool { - backendTypeID == OpenAIBackend.BackendType.local.rawValue + backendTypeID == BackendType.local.rawValue } let contextLengthFormatter: NumberFormatter = { @@ -61,13 +63,19 @@ struct AISettingsView: View { return formatter }() + + @available(*, deprecated, message: "use selectedModelName instead") var selectedModel: Model? { - if let selectedModelId = self.selectedModelId { + if let selectedModelId { models.first(where: { $0.id?.uuidString == selectedModelId }) } else { models.first } } + + var selectedModelName: String? { + modelList.first + } var systemPromptEditor: some View { VStack { @@ -95,13 +103,25 @@ struct AISettingsView: View { var backendTypePicker: some View { HStack { Picker("Backend", selection: $backendTypeID) { - ForEach(OpenAIBackend.BackendType.allCases, id: \.self) { name in + ForEach(BackendType.allCases, id: \.self) { name in Text(name.rawValue).tag(name.rawValue as String?) } } .onAppear { if backendTypeID == nil { - backendTypeID = OpenAIBackend.BackendType.local.rawValue + backendTypeID = BackendType.local.rawValue + } + } + .onChange(of: backendTypeID) { + NotificationCenter.default.post(name: NSNotification.Name("backendTypeIDDidChange"), object: $0) + Task { + do { + try await fetchModels() + // TODO: This is temporary just to list the models + pickedModel = modelList.first + } catch let error { + print("error fetching models:", error) + } } } } @@ -136,19 +156,28 @@ struct AISettingsView: View { var modelPicker: some View { VStack(alignment: .leading) { Picker("Model", selection: $pickedModel) { - ForEach(models) { i in - if let url = i.url { - Text(i.name ?? url.lastPathComponent) - .tag(i.id?.uuidString) - .help(url.path) - } + // TODO: Format the models + ForEach(modelList, id: \.self) { + Text($0) + .tag($0 as String?) + .help($0) } +// ForEach(models) { i in +// if let url = i.url { +// Text(i.name ?? url.lastPathComponent) +// .tag(i.id?.uuidString) +// .help(url.path) +// } +// } if isUsingLocalServer { Divider().tag(nil as String?) Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) } } + .onAppear { + Task { try? await fetchModels() } + } .onReceive(Just(pickedModel)) { _ in switch pickedModel { case AISettingsView.customizeModelsId: @@ -194,7 +223,7 @@ struct AISettingsView: View { } var hasRemoteServerInputChanged: Bool { - inputServerHost != serverHost || inputServerPort != serverPort || inputServerTLS != serverTLS + inputServerHost != serverHost || inputServerPort != serverPort } var hasRemoteConnectionError: Bool { serverHealthScore < 0.25 && serverHealthScore >= 0 @@ -257,7 +286,7 @@ struct AISettingsView: View { .font(.callout) Spacer() } - Toggle(isOn: $inputServerTLS) { + Toggle(isOn: $serverTLS) { Text("Secure connection (HTTPS)") .font(.callout) } @@ -357,7 +386,6 @@ struct AISettingsView: View { } } pickedModel = selectedModelId - inputServerTLS = serverTLS inputServerHost = serverHost ?? "" inputServerPort = serverPort ?? "" updateRemoteServerURL() @@ -394,7 +422,6 @@ struct AISettingsView: View { } private func saveFormRemoteServer() { - serverTLS = inputServerTLS serverHost = inputServerHost serverPort = inputServerPort serverHealthScore = -1 @@ -402,7 +429,7 @@ struct AISettingsView: View { } private func updateRemoteServerURL() { - let scheme = inputServerTLS ? "https" : "http" + let scheme = serverTLS ? "https" : "http" guard let url = URL(string: "\(scheme)://\(inputServerHost):\(inputServerPort)") else { return } Task { @@ -410,9 +437,37 @@ struct AISettingsView: View { await ServerHealth.shared.check() } } + + // MARK: - Fetch models + + private func fetchModels() async throws { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local + let baseURL: URL + if let serverHost, let serverPort { + baseURL = URL(string: "\(serverTLS ? "https" : "http")://\(serverHost):\(serverPort)")! + } else { + baseURL = BackendType.local.defaultURL + } + + switch backendType { + case .local: + let baseURL = BackendType.local.defaultURL + let backend = LocalBackend(contextLength: 0, baseURL: baseURL, apiKey: nil) + modelList = try await backend.listModels() + case .llama: + let backend = LlamaBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) + modelList = try await backend.listModels() + case .openai: + let backend = OpenAIBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) + modelList = backend.listModels() + case .ollama: + let backend = OllamaBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) + modelList = try await backend.listModels() + } + } } #Preview{ - AISettingsView(inputServerTLS: true) + AISettingsView() .environment(\.managedObjectContext, PersistenceController.preview.container.viewContext) } From 43592870c81924bbce3bc638333133c532dc7d80 Mon Sep 17 00:00:00 2001 From: shavit Date: Sat, 9 Mar 2024 11:04:06 -0500 Subject: [PATCH 14/21] Configure all backends Each backend has its own config, model, token, and a default host value. More changes: * Importing a single file will open the app and set the model and backend. * Each backend has its own model list. * Choosing a model will not override other backends. --- .../Mantras.xcdatamodel/contents | 15 +- mac/FreeChat/FreeChatAppDelegate.swift | 14 +- mac/FreeChat/Models/NPC/Agent.swift | 12 +- mac/FreeChat/Models/NPC/Backend.swift | 16 +- mac/FreeChat/Models/NPC/LocalBackend.swift | 2 +- .../ConversationView/ConversationView.swift | 72 +++--- .../Views/Settings/AISettingsView.swift | 226 +++++++++--------- 7 files changed, 184 insertions(+), 173 deletions(-) diff --git a/mac/FreeChat/Chats.xcdatamodeld/Mantras.xcdatamodel/contents b/mac/FreeChat/Chats.xcdatamodeld/Mantras.xcdatamodel/contents index 2b870f4..1066110 100644 --- a/mac/FreeChat/Chats.xcdatamodeld/Mantras.xcdatamodel/contents +++ b/mac/FreeChat/Chats.xcdatamodeld/Mantras.xcdatamodel/contents @@ -1,5 +1,16 @@ - + + + + + + + + + + + + @@ -37,4 +48,4 @@ - \ No newline at end of file + diff --git a/mac/FreeChat/FreeChatAppDelegate.swift b/mac/FreeChat/FreeChatAppDelegate.swift index c63b9f8..8eab344 100644 --- a/mac/FreeChat/FreeChatAppDelegate.swift +++ b/mac/FreeChat/FreeChatAppDelegate.swift @@ -7,21 +7,25 @@ import SwiftUI class FreeChatAppDelegate: NSObject, NSApplicationDelegate, ObservableObject { @AppStorage("selectedModelId") private var selectedModelId: String? - + @AppStorage("backendTypeID") private var backendTypeID: String = BackendType.local.rawValue + func application(_ application: NSApplication, open urls: [URL]) { + backendTypeID = BackendType.local.rawValue let viewContext = PersistenceController.shared.container.viewContext do { let req = Model.fetchRequest() req.predicate = NSPredicate(format: "name IN %@", urls.map({ $0.lastPathComponent })) - let existingModels = try viewContext.fetch(req).compactMap({ $0.url }) + let existingModels = try viewContext.fetch(req) for url in urls { - guard !existingModels.contains(url) else { continue } + guard !existingModels.compactMap({ $0.url }).contains(url) else { continue } let insertedModel = try Model.create(context: viewContext, fileURL: url) selectedModelId = insertedModel.id?.uuidString } - - NotificationCenter.default.post(name: NSNotification.Name("selectedModelDidChange"), object: selectedModelId) + + if urls.count == 1 { selectedModelId = existingModels.first(where: { $0.url == urls.first })?.id?.uuidString } + + NotificationCenter.default.post(name: NSNotification.Name("selectedLocalModelDidChange"), object: selectedModelId) NotificationCenter.default.post(name: NSNotification.Name("needStartNewConversation"), object: selectedModelId) } catch { print("error saving model:", error) diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 844b6c2..191375b 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -30,7 +30,9 @@ class Agent: ObservableObject { llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) } - func createBackend(_ backend: BackendType, contextLength: Int, baseURL: URL, apiKey: String?) { + func createBackend(_ backend: BackendType, contextLength: Int, config: BackendConfig) { + guard let baseURL = config.baseURL, let apiKey = config.apiKey else { return } + switch backend { case .local: self.backend = LocalBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) @@ -46,10 +48,12 @@ class Agent: ObservableObject { // this is the main loop of the agent // listen -> respond -> update mental model and save checkpoint // we respond before updating to avoid a long delay after user input - func listenThinkRespond(speakerId: String, messages: [String]) async throws -> CompleteResponseSummary { + func listenThinkRespond(speakerId: String, messages: [String], model: String) async throws -> CompleteResponseSummary { status = status == .cold ? .coldProcessing : .processing pendingMessage = "" - for try await partialResponse in try await backend.complete(messages: messages) { + let messages = messages.map({ RoleMessage(role: "user", content: $0) }) + let params = CompleteParams(messages: messages, model: model) + for try await partialResponse in try await backend.complete(params: params) { self.pendingMessage += partialResponse self.prompt = pendingMessage } @@ -71,7 +75,7 @@ class Agent: ObservableObject { func warmup() async throws { if prompt.isEmpty, systemPrompt.isEmpty { return } do { - _ = try await backend.complete(messages: []) + _ = try await backend.complete(params: CompleteParams(messages: [], model: "")) status = .ready } catch { status = .cold diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index 26067e5..ab35791 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -12,7 +12,7 @@ protocol Backend: Actor, Sendable { var apiKey: String? { get } var interrupted: Bool { get set } - func complete(messages: [String]) async throws -> AsyncStream + func complete(params: CompleteParams) async throws -> AsyncStream func buildRequest(path: String, params: CompleteParams) -> URLRequest func interrupt() async @@ -20,10 +20,7 @@ protocol Backend: Actor, Sendable { } extension Backend { - func complete(messages: [String]) async throws -> AsyncStream { - let messages = [RoleMessage(role: "system", content: "you know")] - + messages.map({ RoleMessage(role: "user", content: $0) }) - let params = CompleteParams(messages: messages, model: "orca-mini") + func complete(params: CompleteParams) async throws -> AsyncStream { let request = buildRequest(path: "/v1/chat/completions", params: params) self.interrupted = false @@ -36,7 +33,7 @@ extension Backend { switch event { case .open: continue case .error(let error): - print("ollama EventSource server error:", error.localizedDescription) + print("EventSource server error:", error.localizedDescription) break L case .message(let message): if let response = try CompleteResponse.from(data: message.data?.data(using: .utf8)), @@ -44,9 +41,7 @@ extension Backend { continuation.yield(choice.delta.content.removeUnmatchedTrailingQuote()) if choice.finishReason != nil { break L } } - case .closed: - print("ollama EventSource closed") - break L + case .closed: break L } } @@ -58,13 +53,12 @@ extension Backend { func interrupt() async { interrupted = true } func buildRequest(path: String, params: CompleteParams) -> URLRequest { - let apiKey = "" var request = URLRequest(url: baseURL.appendingPathComponent("/v1/chat/completions")) request.httpMethod = "POST" request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.setValue("text/event-stream", forHTTPHeaderField: "Accept") request.setValue("keep-alive", forHTTPHeaderField: "Connection") - request.setValue("Bearer: \(apiKey)", forHTTPHeaderField: "Authorization") + if let apiKey { request.setValue("Bearer: \(apiKey)", forHTTPHeaderField: "Authorization") } request.httpBody = params.toJSON().data(using: .utf8) return request diff --git a/mac/FreeChat/Models/NPC/LocalBackend.swift b/mac/FreeChat/Models/NPC/LocalBackend.swift index f24a42d..4841e49 100644 --- a/mac/FreeChat/Models/NPC/LocalBackend.swift +++ b/mac/FreeChat/Models/NPC/LocalBackend.swift @@ -25,6 +25,6 @@ actor LocalBackend: Backend { let req = Model.fetchRequest() req.sortDescriptors = [NSSortDescriptor(key: "size", ascending: true)] let context = PersistenceController.shared.container.newBackgroundContext() - return try context.fetch(req).compactMap({ $0.url?.path(percentEncoded: false) }) + return try context.fetch(req).compactMap({ $0.url?.lastPathComponent }) } } diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 14ff12b..a6d28e7 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -19,10 +19,6 @@ struct ConversationView: View, Sendable { @AppStorage("contextLength") private var contextLength: Int = DEFAULT_CONTEXT_LENGTH @AppStorage("playSoundEffects") private var playSoundEffects = true @AppStorage("useGPU") private var useGPU: Bool = DEFAULT_USE_GPU - @AppStorage("serverHost") private var serverHost: String? - @AppStorage("serverPort") private var serverPort: String? - @AppStorage("serverTLS") private var serverTLS: Bool? - @AppStorage("openAIToken") private var openAIToken: String? private static let SEND = NSDataAsset(name: "ESM_Perfect_App_Button_2_Organic_Simple_Classic_Game_Click") private static let PING = NSDataAsset(name: "ESM_POWER_ON_SYNTH") @@ -107,15 +103,15 @@ struct ConversationView: View, Sendable { .onAppear { showConversation(conversation) } .onChange(of: conversation) { nextConvo in showConversation(nextConvo) } .onChange(of: selectedModelId) { showConversation(conversation, modelId: $0) } - .navigationTitle(conversation.titleWithDefault) - .alert(isPresented: $showErrorAlert, error: llamaError) { _ in - Button("OK") { - llamaError = nil - } - } message: { error in - Text(error.recoverySuggestion ?? "") + .navigationTitle(conversation.titleWithDefault) + .alert(isPresented: $showErrorAlert, error: llamaError) { _ in + Button("OK") { + llamaError = nil } - .background(Color.textBackground) + } message: { error in + Text(error.recoverySuggestion ?? "") + } + .background(Color.textBackground) } private func playSendSound() { @@ -137,40 +133,38 @@ struct ConversationView: View, Sendable { private func initializeBackends() { let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local - Task { - if backendType == .local { - await initializeBackendLocal() - } else { - await initializeBackendRemote(backend: backendType) - } + if backendType == .local { + Task { try? await initializeBackendLocal() } } + + do { + guard let config = try fetchBackendConfig(backendType: backendType, context: viewContext) else { return } + agent.createBackend(backendType, contextLength: contextLength, config: config) + + } catch { print("error fetching backend config", error) } } - private func initializeBackendLocal() async { + private func initializeBackendLocal() async throws { guard let selectedModelId, !selectedModelId.isEmpty, let id = UUID(uuidString: selectedModelId) else { return } + let llamaPath = await agent.llama.modelPath let req = Model.fetchRequest() req.predicate = NSPredicate(format: "id == %@", id as CVarArg) - if let model = try? viewContext.fetch(req).first, + guard let model = try viewContext.fetch(req).first, let modelPath = model.url?.path(percentEncoded: false), - modelPath != llamaPath { - await agent.llama.stopServer() - agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) - - let baseURL = BackendType.local.defaultURL - agent.createBackend(.local, contextLength: contextLength, baseURL: baseURL, apiKey: openAIToken) - - } - } - - private func initializeBackendRemote(backend: BackendType) async { - guard let tls = serverTLS, let host = serverHost, let port = serverPort + modelPath != llamaPath else { return } + await agent.llama.stopServer() - let baseURL = URL(string: "\(tls ? "https" : "http")://\(host):\(port)")! - agent.createBackend(backend, contextLength: contextLength, baseURL: baseURL, apiKey: openAIToken) + agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) + } + + private func fetchBackendConfig(backendType: BackendType, context: NSManagedObjectContext) throws -> BackendConfig? { + let req = BackendConfig.fetchRequest() + req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) + return try context.fetch(req).first } private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) { @@ -263,7 +257,8 @@ struct ConversationView: View, Sendable { let response: CompleteResponseSummary do { - response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts) + let config = try fetchBackendConfig() + response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts, model: config?.model ?? Model.defaultModelUrl.deletingPathExtension().lastPathComponent) } catch let error as LlamaServerError { handleResponseError(error) return @@ -300,6 +295,13 @@ struct ConversationView: View, Sendable { } } } + + private func fetchBackendConfig() throws -> BackendConfig? { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local + let req = BackendConfig.fetchRequest() + req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) + return try viewContext.fetch(req).first + } } #Preview { diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 76d7408..5e53edb 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -16,21 +16,17 @@ struct AISettingsView: View { @Environment(\.managedObjectContext) private var viewContext @EnvironmentObject var conversationManager: ConversationManager - @available(*, deprecated, message: "use modelList instead") @FetchRequest( sortDescriptors: [NSSortDescriptor(keyPath: \Model.size, ascending: true)], animation: .default) private var models: FetchedResults - @AppStorage("backendTypeID") private var backendTypeID: String? - @AppStorage("selectedModelId") private var selectedModelId: String? + @AppStorage("backendTypeID") private var backendTypeID: String = BackendType.local.rawValue + @AppStorage("selectedModelId") private var selectedModelId: String? // Local only @AppStorage("systemPrompt") private var systemPrompt = DEFAULT_SYSTEM_PROMPT @AppStorage("contextLength") private var contextLength = DEFAULT_CONTEXT_LENGTH @AppStorage("temperature") private var temperature: Double = DEFAULT_TEMP @AppStorage("useGPU") private var useGPU = DEFAULT_USE_GPU - @AppStorage("serverTLS") private var serverTLS: Bool = false - @AppStorage("serverHost") private var serverHost: String? - @AppStorage("serverPort") private var serverPort: String? @AppStorage("openAIToken") private var openAIToken: String? @AppStorage("remoteModelTemplate") var remoteModelTemplate: String? @@ -39,16 +35,16 @@ struct AISettingsView: View { @State var editSystemPrompt = false @State var editFormat = false @State var revealAdvanced = false - @State var inputServerHost: String = "" - @State var inputServerPort: String = "" + @State var serverTLS: Bool = false + @State var serverHost: String = "" + @State var serverPort: String = "" + @State var serverAPIKey: String = "" @State var serverHealthScore: Double = -1 @State var modelList: [String] = [] @StateObject var gpu = GPU.shared - private var isUsingLocalServer: Bool { - backendTypeID == BackendType.local.rawValue - } + private var isUsingLocalServer: Bool { backendTypeID == BackendType.local.rawValue } let contextLengthFormatter: NumberFormatter = { let formatter = NumberFormatter() @@ -62,9 +58,7 @@ struct AISettingsView: View { formatter.minimum = 0 return formatter }() - - @available(*, deprecated, message: "use selectedModelName instead") var selectedModel: Model? { if let selectedModelId { models.first(where: { $0.id?.uuidString == selectedModelId }) @@ -73,9 +67,7 @@ struct AISettingsView: View { } } - var selectedModelName: String? { - modelList.first - } + var selectedModelName: String? { modelList.first } var systemPromptEditor: some View { VStack { @@ -104,25 +96,15 @@ struct AISettingsView: View { HStack { Picker("Backend", selection: $backendTypeID) { ForEach(BackendType.allCases, id: \.self) { name in - Text(name.rawValue).tag(name.rawValue as String?) - } - } - .onAppear { - if backendTypeID == nil { - backendTypeID = BackendType.local.rawValue + Text(name.rawValue).tag(name.rawValue) } } .onChange(of: backendTypeID) { - NotificationCenter.default.post(name: NSNotification.Name("backendTypeIDDidChange"), object: $0) Task { - do { - try await fetchModels() - // TODO: This is temporary just to list the models - pickedModel = modelList.first - } catch let error { - print("error fetching models:", error) - } + do { try await loadBackendConfig() } + catch let error { print("error fetching models:", error) } } + NotificationCenter.default.post(name: NSNotification.Name("backendTypeIDDidChange"), object: $0) } } } @@ -145,8 +127,8 @@ struct AISettingsView: View { .offset(x: -4) } .sheet(isPresented: $editFormat) { - if let model = selectedModel { - EditFormat(model: model) + if let model = selectedModelId { + EditFormat(modelName: model) } else if !isUsingLocalServer { EditFormat(modelName: "Remote") } @@ -156,46 +138,37 @@ struct AISettingsView: View { var modelPicker: some View { VStack(alignment: .leading) { Picker("Model", selection: $pickedModel) { - // TODO: Format the models ForEach(modelList, id: \.self) { Text($0) .tag($0 as String?) .help($0) } -// ForEach(models) { i in -// if let url = i.url { -// Text(i.name ?? url.lastPathComponent) -// .tag(i.id?.uuidString) -// .help(url.path) -// } -// } - if isUsingLocalServer { Divider().tag(nil as String?) Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) } } - .onAppear { - Task { try? await fetchModels() } - } .onReceive(Just(pickedModel)) { _ in - switch pickedModel { - case AISettingsView.customizeModelsId: + if pickedModel == AISettingsView.customizeModelsId { customizeModels = true - case .some(let pickedModelValue): - customizeModels = false - selectedModelId = pickedModelValue - default: break } } .onChange(of: pickedModel) { newValue in - switch pickedModel { - case AISettingsView.customizeModelsId: - customizeModels = true - case .some(let pickedModelValue): - customizeModels = false - selectedModelId = pickedModelValue - default: break + guard newValue != AISettingsView.customizeModelsId else { return } + if let backendType: BackendType = BackendType(rawValue: backendTypeID) { + do { + if backendType == .local, + let model = models.filter({ $0.id?.uuidString == newValue }).first { + selectedModelId = model.id?.uuidString + pickedModel = model.name + } + + let config = try findOrCreateBackendConfig(backendType, context: viewContext) + config.backendType = backendType.rawValue + config.model = pickedModel // newValue could be ID + try viewContext.save() + } + catch { print("error saving backend config:", error) } } } @@ -222,9 +195,6 @@ struct AISettingsView: View { } } - var hasRemoteServerInputChanged: Bool { - inputServerHost != serverHost || inputServerPort != serverPort - } var hasRemoteConnectionError: Bool { serverHealthScore < 0.25 && serverHealthScore >= 0 } @@ -278,10 +248,10 @@ struct AISettingsView: View { var sectionRemoteBackend: some View { Group { HStack { - TextField("Server host", text: $inputServerHost, prompt: Text("yourserver.net")) + TextField("Server host", text: $serverHost, prompt: Text("yourserver.net")) .textFieldStyle(.plain) .font(.callout) - TextField("Server port", text: $inputServerPort, prompt: Text("3000")) + TextField("Server port", text: $serverPort, prompt: Text("8690")) .textFieldStyle(.plain) .font(.callout) Spacer() @@ -290,11 +260,15 @@ struct AISettingsView: View { Text("Secure connection (HTTPS)") .font(.callout) } + HStack { + SecureField("API Key", text: $serverAPIKey) + .textFieldStyle(.plain) + .font(.callout) + } HStack { serverHealthIndication Spacer() - Button("Apply", action: saveFormRemoteServer) - .disabled(!hasRemoteServerInputChanged && !hasRemoteConnectionError) + Button("Apply", action: saveFormRemoteBackend) } } } @@ -373,47 +347,30 @@ struct AISettingsView: View { .sheet(isPresented: $editSystemPrompt) { EditSystemPrompt() } - .onSubmit(saveFormRemoteServer) + .onSubmit(saveFormRemoteBackend) .navigationTitle(AISettingsView.title) .onAppear { - if isUsingLocalServer { - let selectedModelExists = - models - .compactMap({ $0.id?.uuidString }) - .contains(selectedModelId) - if !selectedModelExists { - selectedModelId = models.first?.id?.uuidString - } + Task { + do { try await loadBackendConfig() } + catch let error { print("error fetching models:", error) } } - pickedModel = selectedModelId - inputServerHost = serverHost ?? "" - inputServerPort = serverPort ?? "" - updateRemoteServerURL() } - .onChange(of: selectedModelId) { newModelId in - pickedModel = newModelId - guard - let model = models.first(where: { $0.id?.uuidString == newModelId }) ?? models.first - else { return } - - conversationManager.rebootAgent( - systemPrompt: self.systemPrompt, model: model, viewContext: viewContext) + .onChange(of: selectedModelId) { _ in + if isUsingLocalServer { rebootAgentWithSelectedModel() } } - .onChange(of: systemPrompt) { nextPrompt in - guard let model: Model = selectedModel else { return } - conversationManager.rebootAgent( - systemPrompt: nextPrompt, model: model, viewContext: viewContext) + .onChange(of: systemPrompt) { _ in + if isUsingLocalServer { rebootAgentWithSelectedModel() } } - .onChange(of: useGPU) { nextUseGPU in - guard let model: Model = selectedModel else { return } - conversationManager.rebootAgent( - systemPrompt: self.systemPrompt, model: model, viewContext: viewContext) + .onChange(of: useGPU) { _ in + if isUsingLocalServer { rebootAgentWithSelectedModel() } } .onReceive( - NotificationCenter.default.publisher(for: NSNotification.Name("selectedModelDidChange")) + NotificationCenter.default.publisher(for: NSNotification.Name("selectedLocalModelDidChange")) ) { output in - if let updatedId: String = output.object as? String { - selectedModelId = updatedId + if let model = models.filter({ $0.id?.uuidString == output.object as? String }).first { + selectedModelId = model.id?.uuidString + pickedModel = model.name + backendTypeID = BackendType.local.rawValue } } .frame( @@ -421,18 +378,22 @@ struct AISettingsView: View { alignment: .center) } - private func saveFormRemoteServer() { - serverHost = inputServerHost - serverPort = inputServerPort + private func saveFormRemoteBackend() { + guard let backendType: BackendType = BackendType(rawValue: backendTypeID), + let config = try? findOrCreateBackendConfig(backendType, context: viewContext), + let url = URL(string: "\(serverTLS && config.baseURL != nil ? "https" : "http")://\(serverHost):\(serverPort)") // Default to TLS disabled + else { return } + serverHealthScore = -1 - updateRemoteServerURL() - } + config.apiKey = serverAPIKey + config.baseURL = url + if modelList.contains(pickedModel ?? "") { config.model = pickedModel } + do { try viewContext.save() } + catch { print("error saving backend", error) } - private func updateRemoteServerURL() { - let scheme = serverTLS ? "https" : "http" - guard let url = URL(string: "\(scheme)://\(inputServerHost):\(inputServerPort)") - else { return } + serverTLS = config.baseURL?.scheme == "https" // Match the UI value Task { + if modelList.isEmpty { try? await fetchModels(backendType: backendType) } await ServerHealth.shared.updateURL(url) await ServerHealth.shared.check() } @@ -440,14 +401,9 @@ struct AISettingsView: View { // MARK: - Fetch models - private func fetchModels() async throws { - let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local - let baseURL: URL - if let serverHost, let serverPort { - baseURL = URL(string: "\(serverTLS ? "https" : "http")://\(serverHost):\(serverPort)")! - } else { - baseURL = BackendType.local.defaultURL - } + private func fetchModels(backendType: BackendType) async throws { + let baseURL = URL(string: "\(serverTLS ? "https" : "http")://\(serverHost):\(serverPort)") ?? backendType.defaultURL + modelList.removeAll() switch backendType { case .local: @@ -455,8 +411,7 @@ struct AISettingsView: View { let backend = LocalBackend(contextLength: 0, baseURL: baseURL, apiKey: nil) modelList = try await backend.listModels() case .llama: - let backend = LlamaBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) - modelList = try await backend.listModels() + modelList = ["Unavailable"] case .openai: let backend = OpenAIBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) modelList = backend.listModels() @@ -464,6 +419,47 @@ struct AISettingsView: View { let backend = OllamaBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) modelList = try await backend.listModels() } + + if !modelList.contains(pickedModel ?? "") { pickedModel = modelList.first } + } + + private func rebootAgentWithSelectedModel() { + guard let selectedModelId else { return } + let req = Model.fetchRequest() + req.predicate = NSPredicate(format: "id == %@", selectedModelId) + do { + if let model = try viewContext.fetch(req).first { + conversationManager.rebootAgent(systemPrompt: self.systemPrompt, model: model, viewContext: viewContext) + } + } catch { print("error fetching model id:", selectedModelId, error) } + } + + // MARK: - Backend config + + private func loadBackendConfig() async throws { + let backendType: BackendType = BackendType(rawValue: backendTypeID) ?? .local + let config = try findOrCreateBackendConfig(backendType, context: viewContext) + if backendType == .local, + let model = models.filter({ $0.id?.uuidString == selectedModelId }).first { + config.model = model.name + } + + if config.baseURL == nil { config.baseURL = backendType.defaultURL } + serverTLS = config.baseURL?.scheme == "https" ? true : false + serverHost = config.baseURL?.host() ?? "" + serverPort = "\(config.baseURL?.port ?? 8690)" + serverAPIKey = config.apiKey ?? "" + + try await fetchModels(backendType: backendType) + config.model = config.model ?? modelList.first + pickedModel = config.model + try viewContext.save() + } + + private func findOrCreateBackendConfig(_ backendType: BackendType, context: NSManagedObjectContext) throws -> BackendConfig { + let req = BackendConfig.fetchRequest() + req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) + return try context.fetch(req).first ?? BackendConfig(context: context) } } From c972fb6eca71583c9330ef78ac147da10d3fc3dc Mon Sep 17 00:00:00 2001 From: shavit Date: Sat, 9 Mar 2024 15:00:15 -0500 Subject: [PATCH 15/21] Update model file picker and completion parameters * Update model list when file is added or deleted * Update completion params * Match the picker selection to a model file --- mac/FreeChat/Models/NPC/Agent.swift | 6 +-- mac/FreeChat/Models/NPC/Backend.swift | 53 +++++++++---------- .../ConversationView/ConversationView.swift | 7 ++- .../Views/Settings/AISettingsView.swift | 22 +++++--- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 191375b..60ceb56 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -48,11 +48,9 @@ class Agent: ObservableObject { // this is the main loop of the agent // listen -> respond -> update mental model and save checkpoint // we respond before updating to avoid a long delay after user input - func listenThinkRespond(speakerId: String, messages: [String], model: String) async throws -> CompleteResponseSummary { + func listenThinkRespond(speakerId: String, params: CompleteParams) async throws -> CompleteResponseSummary { status = status == .cold ? .coldProcessing : .processing pendingMessage = "" - let messages = messages.map({ RoleMessage(role: "user", content: $0) }) - let params = CompleteParams(messages: messages, model: model) for try await partialResponse in try await backend.complete(params: params) { self.pendingMessage += partialResponse self.prompt = pendingMessage @@ -75,7 +73,7 @@ class Agent: ObservableObject { func warmup() async throws { if prompt.isEmpty, systemPrompt.isEmpty { return } do { - _ = try await backend.complete(params: CompleteParams(messages: [], model: "")) + _ = try await backend.complete(params: CompleteParams(messages: [], model: "", temperature: 0.7)) status = .ready } catch { status = .cold diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index ab35791..c23cb01 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -53,12 +53,12 @@ extension Backend { func interrupt() async { interrupted = true } func buildRequest(path: String, params: CompleteParams) -> URLRequest { - var request = URLRequest(url: baseURL.appendingPathComponent("/v1/chat/completions")) + var request = URLRequest(url: baseURL.appendingPathComponent(path)) request.httpMethod = "POST" request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.setValue("text/event-stream", forHTTPHeaderField: "Accept") request.setValue("keep-alive", forHTTPHeaderField: "Connection") - if let apiKey { request.setValue("Bearer: \(apiKey)", forHTTPHeaderField: "Authorization") } + request.setValue("Bearer: \(apiKey ?? "none")", forHTTPHeaderField: "Authorization") request.httpBody = params.toJSON().data(using: .utf8) return request @@ -82,39 +82,36 @@ enum BackendType: String, CaseIterable { } struct RoleMessage: Codable { - let role: String + let role: String? let content: String } struct CompleteParams: Encodable { - struct OllamaOptions: Encodable { - enum Mirostat: Int, Encodable { - case disabled = 0 - case v1 = 1 - case v2 = 2 - } - let mirostat: Mirostat - let mirostatETA: Float = 0.1 - let mirostatTAU: Float = 5 - let numCTX = 2048 - let numGQA = 1 - let numGPU: Int? = nil - let numThread: Int? = nil - let repeatLastN = 64 - let repeatPenalty: Float = 1.1 - let temperature: Float = 0.7 - let seed: Int? = nil - let stop: String? = nil - let tfsZ: Float? = nil - let numPredict = 128 - let topK = 40 - let topP: Float = 0.9 + enum Mirostat: Int, Encodable { + case disabled = 0 + case v1 = 1 + case v2 = 2 } let messages: [RoleMessage] let model: String - let format: String? = nil - let options: OllamaOptions? = nil + let mirostat: Mirostat = .disabled + let mirostatETA: Float = 0.1 + let mirostatTAU: Float = 5 + let numCTX = 2048 + let numGQA = 1 + let numGPU: Int? = nil + let numThread: Int? = nil + let repeatLastN = 64 + let repeatPenalty: Float = 1.1 + let temperature: Float // 0.7 + let seed: Int? = nil + let stop: [String]? = nil + let tfsZ: Float? = nil + let numPredict = 128 + let topK = 40 + let topP: Float = 0.9 let template: String? = nil + let cachePrompt = true let stream = true let keepAlive = true @@ -136,7 +133,7 @@ struct CompleteResponse: Decodable { let object: String let created: Int let model: String - let systemFingerprint: String + let systemFingerprint: String? let choices: [Choice] static func from(data: Data?) throws -> CompleteResponse? { diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index a6d28e7..e01045c 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -19,6 +19,7 @@ struct ConversationView: View, Sendable { @AppStorage("contextLength") private var contextLength: Int = DEFAULT_CONTEXT_LENGTH @AppStorage("playSoundEffects") private var playSoundEffects = true @AppStorage("useGPU") private var useGPU: Bool = DEFAULT_USE_GPU + @AppStorage("temperature") private var temperature: Double = DEFAULT_TEMP private static let SEND = NSDataAsset(name: "ESM_Perfect_App_Button_2_Organic_Simple_Classic_Game_Click") private static let PING = NSDataAsset(name: "ESM_POWER_ON_SYNTH") @@ -258,7 +259,11 @@ struct ConversationView: View, Sendable { let response: CompleteResponseSummary do { let config = try fetchBackendConfig() - response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, messages: messageTexts, model: config?.model ?? Model.defaultModelUrl.deletingPathExtension().lastPathComponent) + let messages = messages .compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) + let params = CompleteParams(messages: messages, + model: config?.model ?? Model.defaultModelUrl.deletingPathExtension().lastPathComponent, + temperature: Float(temperature)) + response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, params: params) } catch let error as LlamaServerError { handleResponseError(error) return diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 5e53edb..44f63b1 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -341,7 +341,7 @@ struct AISettingsView: View { } } .formStyle(.grouped) - .sheet(isPresented: $customizeModels, onDismiss: { pickedModel = selectedModelId }) { + .sheet(isPresented: $customizeModels, onDismiss: { setPickedModelFromID(modelID: selectedModelId) }) { EditModels(selectedModelId: $selectedModelId) } .sheet(isPresented: $editSystemPrompt) { @@ -356,6 +356,10 @@ struct AISettingsView: View { } } .onChange(of: selectedModelId) { _ in + Task { + try? await fetchModels(backendType: .local) + setPickedModelFromID(modelID: selectedModelId) + } if isUsingLocalServer { rebootAgentWithSelectedModel() } } .onChange(of: systemPrompt) { _ in @@ -367,11 +371,7 @@ struct AISettingsView: View { .onReceive( NotificationCenter.default.publisher(for: NSNotification.Name("selectedLocalModelDidChange")) ) { output in - if let model = models.filter({ $0.id?.uuidString == output.object as? String }).first { - selectedModelId = model.id?.uuidString - pickedModel = model.name - backendTypeID = BackendType.local.rawValue - } + setPickedModelFromID(modelID: output.object as? String) } .frame( minWidth: 300, maxWidth: 600, minHeight: 184, idealHeight: 195, maxHeight: 400, @@ -434,6 +434,14 @@ struct AISettingsView: View { } catch { print("error fetching model id:", selectedModelId, error) } } + private func setPickedModelFromID(modelID: String?) { + guard let model = models.filter({ $0.id?.uuidString == modelID }).first + else { return } + selectedModelId = model.id?.uuidString + pickedModel = model.name + backendTypeID = BackendType.local.rawValue + } + // MARK: - Backend config private func loadBackendConfig() async throws { @@ -454,6 +462,8 @@ struct AISettingsView: View { config.model = config.model ?? modelList.first pickedModel = config.model try viewContext.save() + + await ServerHealth.shared.updateURL(config.baseURL) } private func findOrCreateBackendConfig(_ backendType: BackendType, context: NSManagedObjectContext) throws -> BackendConfig { From dd30ed445d50178ea9ebd6ed6fcd890dd202f9bb Mon Sep 17 00:00:00 2001 From: shavit Date: Sun, 10 Mar 2024 15:05:04 -0400 Subject: [PATCH 16/21] Make content optional in completion * Update the backend response * Select and use imported model file --- mac/FreeChat/FreeChatAppDelegate.swift | 2 +- mac/FreeChat/Models/NPC/Backend.swift | 4 ++-- mac/FreeChat/Models/NPC/String+TrimQuotes.swift | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mac/FreeChat/FreeChatAppDelegate.swift b/mac/FreeChat/FreeChatAppDelegate.swift index 8eab344..718fcc8 100644 --- a/mac/FreeChat/FreeChatAppDelegate.swift +++ b/mac/FreeChat/FreeChatAppDelegate.swift @@ -23,7 +23,7 @@ class FreeChatAppDelegate: NSObject, NSApplicationDelegate, ObservableObject { selectedModelId = insertedModel.id?.uuidString } - if urls.count == 1 { selectedModelId = existingModels.first(where: { $0.url == urls.first })?.id?.uuidString } + if urls.count == 1, let modelID = existingModels.first(where: { $0.url == urls.first })?.id?.uuidString { selectedModelId = modelID } NotificationCenter.default.post(name: NSNotification.Name("selectedLocalModelDidChange"), object: selectedModelId) NotificationCenter.default.post(name: NSNotification.Name("needStartNewConversation"), object: selectedModelId) diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index c23cb01..f111823 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -38,7 +38,7 @@ extension Backend { case .message(let message): if let response = try CompleteResponse.from(data: message.data?.data(using: .utf8)), let choice = response.choices.first { - continuation.yield(choice.delta.content.removeUnmatchedTrailingQuote()) + if let content = choice.delta.content?.trimTrailingQuote() { continuation.yield(content) } if choice.finishReason != nil { break L } } case .closed: break L @@ -83,7 +83,7 @@ enum BackendType: String, CaseIterable { struct RoleMessage: Codable { let role: String? - let content: String + let content: String? } struct CompleteParams: Encodable { diff --git a/mac/FreeChat/Models/NPC/String+TrimQuotes.swift b/mac/FreeChat/Models/NPC/String+TrimQuotes.swift index c71844c..cc91420 100644 --- a/mac/FreeChat/Models/NPC/String+TrimQuotes.swift +++ b/mac/FreeChat/Models/NPC/String+TrimQuotes.swift @@ -6,7 +6,7 @@ import Foundation extension String { - func removeUnmatchedTrailingQuote() -> String { + func trimTrailingQuote() -> String { guard self.last == "\"" else { return self } // Count the number of quotes in the string From e40daddedb112a4ef3c5871630cc6c0b6e6874d2 Mon Sep 17 00:00:00 2001 From: shavit Date: Tue, 12 Mar 2024 15:02:14 -0400 Subject: [PATCH 17/21] Update backend initialization * Remove context from backends * Provide a fallback baseURL to new backends * Pass config to create backends and add another fallback to ensure initialization --- mac/FreeChat/Models/NPC/Agent.swift | 12 ++++++------ mac/FreeChat/Models/NPC/Backend.swift | 2 +- mac/FreeChat/Models/NPC/LlamaBackend.swift | 5 +---- mac/FreeChat/Models/NPC/LlamaServer.swift | 3 +-- mac/FreeChat/Models/NPC/LocalBackend.swift | 5 +---- mac/FreeChat/Models/NPC/OllamaBackend.swift | 5 +---- mac/FreeChat/Models/NPC/OpenAIBackend.swift | 7 ++----- .../Views/ConversationView/ConversationView.swift | 7 +++---- mac/FreeChat/Views/Settings/AISettingsView.swift | 8 ++++---- 9 files changed, 20 insertions(+), 34 deletions(-) diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 60ceb56..d99a252 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -31,17 +31,17 @@ class Agent: ObservableObject { } func createBackend(_ backend: BackendType, contextLength: Int, config: BackendConfig) { - guard let baseURL = config.baseURL, let apiKey = config.apiKey else { return } + let baseURL = config.baseURL ?? backend.defaultURL // Prevent backend crash; should have value. switch backend { case .local: - self.backend = LocalBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + self.backend = LocalBackend(baseURL: baseURL, apiKey: config.apiKey) case .llama: - self.backend = LlamaBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + self.backend = LlamaBackend(baseURL: baseURL, apiKey: config.apiKey) case .openai: - self.backend = OpenAIBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + self.backend = OpenAIBackend(baseURL: baseURL, apiKey: config.apiKey) case .ollama: - self.backend = OllamaBackend(contextLength: contextLength, baseURL: baseURL, apiKey: apiKey) + self.backend = OllamaBackend(baseURL: baseURL, apiKey: config.apiKey) } } @@ -73,7 +73,7 @@ class Agent: ObservableObject { func warmup() async throws { if prompt.isEmpty, systemPrompt.isEmpty { return } do { - _ = try await backend.complete(params: CompleteParams(messages: [], model: "", temperature: 0.7)) + _ = try await backend.complete(params: CompleteParams(messages: [], model: "", numCTX: 2048, temperature: 0.7)) status = .ready } catch { status = .cold diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index f111823..d38c8f9 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -97,7 +97,7 @@ struct CompleteParams: Encodable { let mirostat: Mirostat = .disabled let mirostatETA: Float = 0.1 let mirostatTAU: Float = 5 - let numCTX = 2048 + let numCTX: Int // 2048 let numGQA = 1 let numGPU: Int? = nil let numThread: Int? = nil diff --git a/mac/FreeChat/Models/NPC/LlamaBackend.swift b/mac/FreeChat/Models/NPC/LlamaBackend.swift index b732e4b..13df7b8 100644 --- a/mac/FreeChat/Models/NPC/LlamaBackend.swift +++ b/mac/FreeChat/Models/NPC/LlamaBackend.swift @@ -10,11 +10,8 @@ actor LlamaBackend: Backend { var baseURL: URL var apiKey: String? var interrupted = false - - private let contextLength: Int - init(contextLength: Int, baseURL: URL, apiKey: String?) { - self.contextLength = contextLength + init(baseURL: URL, apiKey: String?) { self.baseURL = baseURL self.apiKey = apiKey } diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index 67d1369..f41b15c 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -13,14 +13,13 @@ actor LlamaServer { private var process = Process() private var serverUp = false private var serverErrorMessage = "" - private let url: URL + private let url = URL(string: "http://127.0.0.1:8690")! private var monitor = Process() init(modelPath: String, contextLength: Int) { self.modelPath = modelPath self.contextLength = contextLength - self.url = URL(string: "http://127.0.0.1:8690")! } // Start a monitor process that will terminate the server when our app dies. diff --git a/mac/FreeChat/Models/NPC/LocalBackend.swift b/mac/FreeChat/Models/NPC/LocalBackend.swift index 4841e49..7298c60 100644 --- a/mac/FreeChat/Models/NPC/LocalBackend.swift +++ b/mac/FreeChat/Models/NPC/LocalBackend.swift @@ -11,10 +11,7 @@ actor LocalBackend: Backend { var apiKey: String? var interrupted = false - private let contextLength: Int - - init(contextLength: Int, baseURL: URL, apiKey: String?) { - self.contextLength = contextLength + init(baseURL: URL, apiKey: String?) { self.baseURL = baseURL self.apiKey = apiKey } diff --git a/mac/FreeChat/Models/NPC/OllamaBackend.swift b/mac/FreeChat/Models/NPC/OllamaBackend.swift index 42a3ba7..43e4fa8 100644 --- a/mac/FreeChat/Models/NPC/OllamaBackend.swift +++ b/mac/FreeChat/Models/NPC/OllamaBackend.swift @@ -11,10 +11,7 @@ actor OllamaBackend: Backend { var apiKey: String? var interrupted = false - private let contextLength: Int - - init(contextLength: Int, baseURL: URL, apiKey: String?) { - self.contextLength = contextLength + init(baseURL: URL, apiKey: String?) { self.baseURL = baseURL self.apiKey = apiKey } diff --git a/mac/FreeChat/Models/NPC/OpenAIBackend.swift b/mac/FreeChat/Models/NPC/OpenAIBackend.swift index dcebc1d..b73d99d 100644 --- a/mac/FreeChat/Models/NPC/OpenAIBackend.swift +++ b/mac/FreeChat/Models/NPC/OpenAIBackend.swift @@ -9,12 +9,9 @@ actor OpenAIBackend: Backend { var type: BackendType = .openai let baseURL: URL let apiKey: String? - var interrupted: Bool = false + var interrupted = false - private let contextLength: Int - - init(contextLength: Int, baseURL: URL, apiKey: String?) { - self.contextLength = contextLength + init(baseURL: URL, apiKey: String?) { self.baseURL = baseURL self.apiKey = apiKey } diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index e01045c..467c3f9 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -139,7 +139,7 @@ struct ConversationView: View, Sendable { } do { - guard let config = try fetchBackendConfig(backendType: backendType, context: viewContext) else { return } + let config = try fetchBackendConfig(backendType: backendType, context: viewContext) ?? BackendConfig(context: viewContext) agent.createBackend(backendType, contextLength: contextLength, config: config) } catch { print("error fetching backend config", error) } @@ -228,8 +228,6 @@ struct ConversationView: View, Sendable { showUserMessage = true } - let messageTexts = messages.map { $0.text ?? "" } - // Pending message for bot's reply let m = Message(context: viewContext) m.fromId = agent.id @@ -259,9 +257,10 @@ struct ConversationView: View, Sendable { let response: CompleteResponseSummary do { let config = try fetchBackendConfig() - let messages = messages .compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) + let messages = messages.compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) let params = CompleteParams(messages: messages, model: config?.model ?? Model.defaultModelUrl.deletingPathExtension().lastPathComponent, + numCTX: contextLength, temperature: Float(temperature)) response = try await agent.listenThinkRespond(speakerId: Message.USER_SPEAKER_ID, params: params) } catch let error as LlamaServerError { diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 44f63b1..a75bc70 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -289,7 +289,7 @@ struct AISettingsView: View { content: { VStack(alignment: .leading) { HStack { - Text("Configure llama.cpp based on the model you're using.") + Text("Configure your backend based on the model you're using.") .foregroundColor(Color(NSColor.secondaryLabelColor)) Button("Restore defaults") { contextLength = DEFAULT_CONTEXT_LENGTH @@ -408,15 +408,15 @@ struct AISettingsView: View { switch backendType { case .local: let baseURL = BackendType.local.defaultURL - let backend = LocalBackend(contextLength: 0, baseURL: baseURL, apiKey: nil) + let backend = LocalBackend(baseURL: baseURL, apiKey: nil) modelList = try await backend.listModels() case .llama: modelList = ["Unavailable"] case .openai: - let backend = OpenAIBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) + let backend = OpenAIBackend(baseURL: baseURL, apiKey: nil) modelList = backend.listModels() case .ollama: - let backend = OllamaBackend(contextLength: 0, baseURL: baseURL, apiKey: openAIToken) + let backend = OllamaBackend(baseURL: baseURL, apiKey: nil) modelList = try await backend.listModels() } From c8c0ba2c6bf3391fa35dc4d224fa849bf8319ae5 Mon Sep 17 00:00:00 2001 From: shavit Date: Tue, 12 Mar 2024 16:22:20 -0400 Subject: [PATCH 18/21] Disable remote llama.cpp model selection and add backend descriptions --- mac/FreeChat/Models/NPC/Backend.swift | 9 +++++++++ mac/FreeChat/Views/Settings/AISettingsView.swift | 1 + 2 files changed, 10 insertions(+) diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index d38c8f9..efd7266 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -79,6 +79,15 @@ enum BackendType: String, CaseIterable { case .openai: return URL(string: "https://api.openai.com")! } } + + var howtoConfigure: String { + switch self { + case .local: NSLocalizedString("Runs on this computer using llama.cpp. No configuration required", comment: "No configuration") + case .llama: NSLocalizedString("Llama.cpp is an efficient server than runs more than just LLaMa models. [Learn more](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)", comment: "What it is and Usage link") + case .openai: NSLocalizedString("Configure OpenAI's ChatGPT. [Learn more](https://openai.com/product)", comment: "What it is and Usage link") + case .ollama: NSLocalizedString("Ollama runs large language models locally. [Learn more](https://ollama.com)", comment: "What it is and Usage link") + } + } } struct RoleMessage: Codable { diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index a75bc70..73bb746 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -148,6 +148,7 @@ struct AISettingsView: View { Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) } } + .disabled(backendTypeID == BackendType.llama.rawValue) .onReceive(Just(pickedModel)) { _ in if pickedModel == AISettingsView.customizeModelsId { customizeModels = true From ec4a8b2c388a2328185b7506b6f0b60e71455e83 Mon Sep 17 00:00:00 2001 From: shavit Date: Tue, 12 Mar 2024 16:27:00 -0400 Subject: [PATCH 19/21] Use localized formatted strings for help * Use localized strings with markdown. * Add system prompt to completion. * Add a default port 443 for OpenAI to ensure port value in settings. * Determine default value for `selectedModelId`. --- mac/FreeChat/Models/NPC/Backend.swift | 12 ++-- .../ConversationView/ConversationView.swift | 3 +- .../Views/Settings/AISettingsView.swift | 68 +++++++------------ 3 files changed, 33 insertions(+), 50 deletions(-) diff --git a/mac/FreeChat/Models/NPC/Backend.swift b/mac/FreeChat/Models/NPC/Backend.swift index efd7266..eb4ad97 100644 --- a/mac/FreeChat/Models/NPC/Backend.swift +++ b/mac/FreeChat/Models/NPC/Backend.swift @@ -76,16 +76,16 @@ enum BackendType: String, CaseIterable { case .local: return URL(string: "http://127.0.0.1:8690")! case .llama: return URL(string: "http://127.0.0.1:8690")! case .ollama: return URL(string: "http://127.0.0.1:11434")! - case .openai: return URL(string: "https://api.openai.com")! + case .openai: return URL(string: "https://api.openai.com:443")! } } - var howtoConfigure: String { + var howtoConfigure: AttributedString { switch self { - case .local: NSLocalizedString("Runs on this computer using llama.cpp. No configuration required", comment: "No configuration") - case .llama: NSLocalizedString("Llama.cpp is an efficient server than runs more than just LLaMa models. [Learn more](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)", comment: "What it is and Usage link") - case .openai: NSLocalizedString("Configure OpenAI's ChatGPT. [Learn more](https://openai.com/product)", comment: "What it is and Usage link") - case .ollama: NSLocalizedString("Ollama runs large language models locally. [Learn more](https://ollama.com)", comment: "What it is and Usage link") + case .local: try! AttributedString(markdown: NSLocalizedString("Runs on this computer offline using llama.cpp. No configuration required", comment: "No configuration")) + case .llama: try! AttributedString(markdown: NSLocalizedString("Llama.cpp is an efficient server than runs more than just LLaMa models. [Learn more](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)", comment: "What it is and Usage link")) + case .openai: try! AttributedString(markdown: NSLocalizedString("Configure OpenAI's ChatGPT. [Learn more](https://openai.com/product)", comment: "What it is and Usage link")) + case .ollama: try! AttributedString(markdown: NSLocalizedString("Ollama runs large language models locally. [Learn more](https://ollama.com)", comment: "What it is and Usage link")) } } } diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 467c3f9..b083090 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -257,7 +257,8 @@ struct ConversationView: View, Sendable { let response: CompleteResponseSummary do { let config = try fetchBackendConfig() - let messages = messages.compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) + let messages = [RoleMessage(role: "system", content: systemPrompt)] + + messages.compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) let params = CompleteParams(messages: messages, model: config?.model ?? Model.defaultModelUrl.deletingPathExtension().lastPathComponent, numCTX: contextLength, diff --git a/mac/FreeChat/Views/Settings/AISettingsView.swift b/mac/FreeChat/Views/Settings/AISettingsView.swift index 73bb746..5d061f4 100644 --- a/mac/FreeChat/Views/Settings/AISettingsView.swift +++ b/mac/FreeChat/Views/Settings/AISettingsView.swift @@ -28,7 +28,6 @@ struct AISettingsView: View { @AppStorage("temperature") private var temperature: Double = DEFAULT_TEMP @AppStorage("useGPU") private var useGPU = DEFAULT_USE_GPU @AppStorage("openAIToken") private var openAIToken: String? - @AppStorage("remoteModelTemplate") var remoteModelTemplate: String? @State var pickedModel: String? // Picker selection @State var customizeModels = false // Show add remove models @@ -93,7 +92,7 @@ struct AISettingsView: View { } var backendTypePicker: some View { - HStack { + VStack(alignment: .leading) { Picker("Backend", selection: $backendTypeID) { ForEach(BackendType.allCases, id: \.self) { name in Text(name.rawValue).tag(name.rawValue) @@ -106,20 +105,21 @@ struct AISettingsView: View { } NotificationCenter.default.post(name: NSNotification.Name("backendTypeIDDidChange"), object: $0) } + Text(BackendType(rawValue: backendTypeID)?.howtoConfigure ?? "") + .font(.callout) + .foregroundColor(Color(NSColor.secondaryLabelColor)) + .lineLimit(5) + .fixedSize(horizontal: false, vertical: true) + .padding(.top, 0.5) } } + @available(*, deprecated, message: "template is not supported") var editPromptFormat: some View { HStack { - if let model = selectedModel { - Text("Prompt format: \(model.template.format.rawValue)") - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .font(.caption) - } else if !isUsingLocalServer { - Text("Prompt format: \(remoteModelTemplate ?? TemplateFormat.vicuna.rawValue)") - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .font(.caption) - } + Text("Prompt format \(selectedModel?.template.format.rawValue ?? "")") + .foregroundColor(Color(NSColor.secondaryLabelColor)) + .font(.caption) Button("Edit") { editFormat = true } @@ -148,7 +148,7 @@ struct AISettingsView: View { Text("Add or Remove Models...").tag(AISettingsView.customizeModelsId as String?) } } - .disabled(backendTypeID == BackendType.llama.rawValue) + .disabled(backendTypeID == BackendType.llama.rawValue || modelList.isEmpty) .onReceive(Just(pickedModel)) { _ in if pickedModel == AISettingsView.customizeModelsId { customizeModels = true @@ -182,17 +182,7 @@ struct AISettingsView: View { .lineLimit(5) .fixedSize(horizontal: false, vertical: true) .padding(.top, 0.5) - } else { - Text( - "If you have access to a powerful server, you may want to run your model there. Enter the host and port to connect to a remote llama.cpp server. Instructions for running the server can be found [here](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)" - ) - .font(.callout) - .foregroundColor(Color(NSColor.secondaryLabelColor)) - .lineLimit(5) - .fixedSize(horizontal: false, vertical: true) - .padding(.top, 0.5) } - editPromptFormat } } @@ -202,18 +192,12 @@ struct AISettingsView: View { var indicatorColor: Color { switch serverHealthScore { - case 0..<0.25: - Color(red: 1, green: 0, blue: 0) - case 0.25..<0.5: - Color(red: 1, green: 0.5, blue: 0) - case 0.5..<0.75: - Color(red: 0.45, green: 0.55, blue: 0) - case 0.75..<0.95: - Color(red: 0.1, green: 0.9, blue: 0) - case 0.95...1: - Color(red: 0, green: 1, blue: 0) - default: - Color(red: 0.5, green: 0.5, blue: 0.5) + case 0..<0.25: Color(red: 1, green: 0, blue: 0) + case 0.25..<0.5: Color(red: 1, green: 0.5, blue: 0) + case 0.5..<0.75: Color(red: 0.45, green: 0.55, blue: 0) + case 0.75..<0.95: Color(red: 0.1, green: 0.9, blue: 0) + case 0.95...1: Color(red: 0, green: 1, blue: 0) + default: Color(red: 0.5, green: 0.5, blue: 0.5) } } @@ -225,12 +209,9 @@ struct AISettingsView: View { .foregroundColor(indicatorColor) Group { switch serverHealthScore { - case 0.25...1: - Text("Connected") - case 0..<0.25: - Text("Connection Error. Retrying...") - default: - Text("Not Connected") + case 0.25...1: Text("Connected") + case 0..<0.25: Text("Connection Error. Retrying...") + default: Text("Not Connected") } } .font(.callout) @@ -448,9 +429,10 @@ struct AISettingsView: View { private func loadBackendConfig() async throws { let backendType: BackendType = BackendType(rawValue: backendTypeID) ?? .local let config = try findOrCreateBackendConfig(backendType, context: viewContext) - if backendType == .local, - let model = models.filter({ $0.id?.uuidString == selectedModelId }).first { - config.model = model.name + if backendType == .local { + let model = models.first(where: { $0.id?.uuidString == selectedModelId }) ?? models.first + config.model = model?.name + selectedModelId = model?.id?.uuidString } if config.baseURL == nil { config.baseURL = backendType.defaultURL } From d71f565cb1029fa9b9b22b1b44719be220d85e8b Mon Sep 17 00:00:00 2001 From: shavit Date: Sat, 16 Mar 2024 12:05:04 -0400 Subject: [PATCH 20/21] Create backends each time the agent reboots Fetch the backend config and create the backend on each reboot. --- mac/FreeChat/Models/ConversationManager.swift | 22 ++++++++++++++----- mac/FreeChat/Models/NPC/Agent.swift | 2 +- .../ConversationView/ConversationView.swift | 14 ++++-------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/mac/FreeChat/Models/ConversationManager.swift b/mac/FreeChat/Models/ConversationManager.swift index 4bb6f8b..33b3e35 100644 --- a/mac/FreeChat/Models/ConversationManager.swift +++ b/mac/FreeChat/Models/ConversationManager.swift @@ -15,6 +15,7 @@ class ConversationManager: ObservableObject { var summonRegistered = false + @AppStorage("backendTypeID") private var backendTypeID: String? @AppStorage("systemPrompt") private var systemPrompt: String = DEFAULT_SYSTEM_PROMPT @AppStorage("contextLength") private var contextLength: Int = DEFAULT_CONTEXT_LENGTH @@ -72,10 +73,8 @@ class ConversationManager: ObservableObject { @MainActor func rebootAgent(systemPrompt: String? = nil, model: Model, viewContext: NSManagedObjectContext) { + guard let url = model.url else { return } let systemPrompt = systemPrompt ?? self.systemPrompt - guard let url = model.url else { - return - } Task { await agent.llama.stopServer() @@ -83,12 +82,25 @@ class ConversationManager: ObservableObject { let messages = currentConversation.orderedMessages.map { $0.text ?? "" } let convoPrompt = model.template.run(systemPrompt: systemPrompt, messages: messages) agent = Agent(id: "Llama", prompt: convoPrompt, systemPrompt: systemPrompt, modelPath: url.path, contextLength: contextLength) - loadingModelId = model.id?.uuidString - model.error = nil + do { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local + let context = PersistenceController.shared.container.newBackgroundContext() + let config = try fetchBackendConfig(context: context) ?? BackendConfig(context: context) + agent.createBackend(backendType, contextLength: contextLength, config: config) + } catch { print("error fetching backend config", error) } + loadingModelId = model.id?.uuidString + model.error = nil loadingModelId = nil try? viewContext.save() } } + + private func fetchBackendConfig(context: NSManagedObjectContext) throws -> BackendConfig? { + let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local + let req = BackendConfig.fetchRequest() + req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) + return try context.fetch(req).first + } } diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index d99a252..6d74bc3 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -31,7 +31,7 @@ class Agent: ObservableObject { } func createBackend(_ backend: BackendType, contextLength: Int, config: BackendConfig) { - let baseURL = config.baseURL ?? backend.defaultURL // Prevent backend crash; should have value. + let baseURL = config.baseURL ?? backend.defaultURL switch backend { case .local: diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index b083090..850c6e6 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -139,7 +139,7 @@ struct ConversationView: View, Sendable { } do { - let config = try fetchBackendConfig(backendType: backendType, context: viewContext) ?? BackendConfig(context: viewContext) + let config = try fetchBackendConfig(context: viewContext) ?? BackendConfig(context: viewContext) agent.createBackend(backendType, contextLength: contextLength, config: config) } catch { print("error fetching backend config", error) } @@ -162,12 +162,6 @@ struct ConversationView: View, Sendable { agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) } - private func fetchBackendConfig(backendType: BackendType, context: NSManagedObjectContext) throws -> BackendConfig? { - let req = BackendConfig.fetchRequest() - req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) - return try context.fetch(req).first - } - private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) { let fiveSecondsAgo = Date() - TimeInterval(5) // 5 seconds ago let last = messages.last @@ -256,7 +250,7 @@ struct ConversationView: View, Sendable { let response: CompleteResponseSummary do { - let config = try fetchBackendConfig() + let config = try fetchBackendConfig(context: viewContext) let messages = [RoleMessage(role: "system", content: systemPrompt)] + messages.compactMap({ $0.text }).map({ RoleMessage(role: "user", content: $0) }) let params = CompleteParams(messages: messages, @@ -301,11 +295,11 @@ struct ConversationView: View, Sendable { } } - private func fetchBackendConfig() throws -> BackendConfig? { + private func fetchBackendConfig(context: NSManagedObjectContext) throws -> BackendConfig? { let backendType: BackendType = BackendType(rawValue: backendTypeID ?? "") ?? .local let req = BackendConfig.fetchRequest() req.predicate = NSPredicate(format: "backendType == %@", backendType.rawValue) - return try viewContext.fetch(req).first + return try context.fetch(req).first } } From 28799950286bfa43fba0db3875849f0b8aa9afb2 Mon Sep 17 00:00:00 2001 From: shavit Date: Mon, 25 Mar 2024 15:20:04 -0400 Subject: [PATCH 21/21] Initialize agent backend and start server * Create a backend during agent initialization. * Start the local llama server in conversation view --- mac/FreeChat/Models/NPC/Agent.swift | 7 ++++--- mac/FreeChat/Models/NPC/LlamaServer.swift | 2 +- mac/FreeChat/Views/ConversationView/ConversationView.swift | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mac/FreeChat/Models/NPC/Agent.swift b/mac/FreeChat/Models/NPC/Agent.swift index 6d74bc3..f87dbfc 100644 --- a/mac/FreeChat/Models/NPC/Agent.swift +++ b/mac/FreeChat/Models/NPC/Agent.swift @@ -21,13 +21,14 @@ class Agent: ObservableObject { // each agent runs their own server var llama: LlamaServer - private var backend: Backend! + private var backend: Backend init(id: String, prompt: String, systemPrompt: String, modelPath: String, contextLength: Int) { self.id = id self.prompt = prompt self.systemPrompt = systemPrompt - llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) + self.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) + self.backend = LocalBackend(baseURL: BackendType.local.defaultURL, apiKey: nil) } func createBackend(_ backend: BackendType, contextLength: Int, config: BackendConfig) { @@ -67,7 +68,7 @@ class Agent: ObservableObject { func interrupt() async { if status != .processing, status != .coldProcessing { return } - await backend?.interrupt() + await backend.interrupt() } func warmup() async throws { diff --git a/mac/FreeChat/Models/NPC/LlamaServer.swift b/mac/FreeChat/Models/NPC/LlamaServer.swift index f41b15c..e2bef47 100644 --- a/mac/FreeChat/Models/NPC/LlamaServer.swift +++ b/mac/FreeChat/Models/NPC/LlamaServer.swift @@ -58,7 +58,7 @@ actor LlamaServer { print("started monitor for \(serverPID)") } - private func startServer() async throws { + func startServer() async throws { guard !process.isRunning, let modelPath = self.modelPath else { return } stopServer() process = Process() diff --git a/mac/FreeChat/Views/ConversationView/ConversationView.swift b/mac/FreeChat/Views/ConversationView/ConversationView.swift index 850c6e6..5520ac1 100644 --- a/mac/FreeChat/Views/ConversationView/ConversationView.swift +++ b/mac/FreeChat/Views/ConversationView/ConversationView.swift @@ -160,6 +160,7 @@ struct ConversationView: View, Sendable { await agent.llama.stopServer() agent.llama = LlamaServer(modelPath: modelPath, contextLength: contextLength) + try await agent.llama.startServer() } private func scrollToLastIfRecent(_ proxy: ScrollViewProxy) {