diff --git a/AGENTS.md b/AGENTS.md index 8fec5e8..0e5fe40 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -244,7 +244,7 @@ enable_proxy_agent: false # Enable remote proxy agent enable_deep_research: true # Enable deep research service # Deep research configuration -deep_research_image: "ghcr.io/eternisai/deep_research:sha-ff37ec2" +deep_research_image: "ghcr.io/eternisai/deep_research:sha-XXXXXXX" # See manager.go for current default deep_research_port: 3031 search_provider: "perplexity" perplexity_api_key: "" # Required for deep research web search diff --git a/go.mod b/go.mod index f8827c5..b51c1f6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/eternisai/silo -go 1.25.0 +go 1.24.0 require ( github.com/fatih/color v1.16.0 diff --git a/internal/cli/completion.go b/internal/cli/completion.go index 01ea4e0..d5519a2 100644 --- a/internal/cli/completion.go +++ b/internal/cli/completion.go @@ -45,13 +45,13 @@ PowerShell: Run: func(cmd *cobra.Command, args []string) { switch args[0] { case "bash": - rootCmd.GenBashCompletion(os.Stdout) + _ = rootCmd.GenBashCompletion(os.Stdout) case "zsh": - rootCmd.GenZshCompletion(os.Stdout) + _ = rootCmd.GenZshCompletion(os.Stdout) case "fish": - rootCmd.GenFishCompletion(os.Stdout, true) + _ = rootCmd.GenFishCompletion(os.Stdout, true) case "powershell": - rootCmd.GenPowerShellCompletionWithDesc(os.Stdout) + _ = rootCmd.GenPowerShellCompletionWithDesc(os.Stdout) } }, } diff --git a/internal/daemon/handlers.go b/internal/daemon/handlers.go index 7aed4a7..cfbd25c 100644 --- a/internal/daemon/handlers.go +++ b/internal/daemon/handlers.go @@ -266,7 +266,7 @@ func (s *Server) handleLogs(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(APIResponse{ + _ = json.NewEncoder(w).Encode(APIResponse{ Success: true, Message: "Logs command executed (output sent to stdout)", Data: map[string]interface{}{"note": "Logs are output to stdout, not captured in API response"}, @@ -313,7 +313,7 @@ func (s *Server) handleVersion(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(APIResponse{ + _ = json.NewEncoder(w).Encode(APIResponse{ Success: true, Message: "Version information retrieved", Data: data, @@ -522,7 +522,7 @@ func (s *Server) handleInferenceLogs(w http.ResponseWriter, r *http.Request) { func (s *Server) respondError(w http.ResponseWriter, status int, error, details string) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) - json.NewEncoder(w).Encode(APIResponse{ + _ = json.NewEncoder(w).Encode(APIResponse{ Success: false, Error: error, Details: details, @@ -533,7 +533,7 @@ func (s *Server) respondError(w http.ResponseWriter, status int, error, details func (s *Server) respondWithLogs(w http.ResponseWriter, status int, success bool, message, error, details string, logs []LogEntry) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) - json.NewEncoder(w).Encode(APIResponse{ + _ = json.NewEncoder(w).Encode(APIResponse{ Success: success, Message: message, Error: error, diff --git a/internal/daemon/server.go b/internal/daemon/server.go index e51696f..0f124b2 100644 --- a/internal/daemon/server.go +++ b/internal/daemon/server.go @@ -142,7 +142,7 @@ func (s *Server) loggingMiddleware(next http.Handler) http.Handler { // handleHealth returns basic health status func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ + _ = json.NewEncoder(w).Encode(map[string]string{ "status": "ok", }) } @@ -157,5 +157,5 @@ func (s *Server) handleStatus(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(status) + _ = json.NewEncoder(w).Encode(status) } diff --git a/internal/docker/compose.go b/internal/docker/compose.go index 70b49a8..35c6b9a 100644 --- a/internal/docker/compose.go +++ b/internal/docker/compose.go @@ -54,11 +54,30 @@ func Down(ctx context.Context, composePath string, removeVolumes bool) error { return nil } -func Pull(ctx context.Context, composePath string) error { +// PullResult contains the result of pulling a service image +type PullResult struct { + Service string + Error error +} + +// Pull pulls images for the given services, or default services if none specified. +// Returns results for each service attempted. +func Pull(ctx context.Context, composePath string, services ...string) []PullResult { + if len(services) == 0 { + services = []string{"backend", "frontend"} + } + + var results []PullResult + for _, service := range services { + err := pullService(ctx, composePath, service) + results = append(results, PullResult{Service: service, Error: err}) + } + return results +} + +func pullService(ctx context.Context, composePath string, service string) error { composeCmd := GetComposeCommand() - services := []string{"backend", "frontend"} - args := append(composeCmd[1:], "-f", composePath, "pull") - args = append(args, services...) + args := append(composeCmd[1:], "-f", composePath, "pull", service) cmd := exec.CommandContext(ctx, composeCmd[0], args...) cmd.Stdout = os.Stdout @@ -66,7 +85,7 @@ func Pull(ctx context.Context, composePath string) error { cmd.Dir = filepath.Dir(composePath) if err := cmd.Run(); err != nil { - return fmt.Errorf("failed to pull images: %w", err) + return fmt.Errorf("failed to pull %s: %w", service, err) } return nil } diff --git a/internal/inference/inference.go b/internal/inference/inference.go index d905f6b..e1bcb91 100644 --- a/internal/inference/inference.go +++ b/internal/inference/inference.go @@ -197,14 +197,6 @@ func (e *Engine) getContainerName() string { return DefaultContainerName } -// getImage returns the image from config or default -func (e *Engine) getImage() string { - if e.cfg.SGLang.Image != "" { - return e.cfg.SGLang.Image - } - return DefaultImage -} - // buildDockerRunArgs builds the docker run command arguments func (e *Engine) buildDockerRunArgs() []string { return []string{ diff --git a/internal/inference/inference_test.go b/internal/inference/inference_test.go index 1980939..9b0473b 100644 --- a/internal/inference/inference_test.go +++ b/internal/inference/inference_test.go @@ -19,7 +19,7 @@ func TestBuildDockerRunArgs(t *testing.T) { // Expected command (with $HOME expanded) homeDir := os.Getenv("HOME") - expected := `docker run -d --name glm_model --restart unless-stopped --gpus "device=0,1,2" --shm-size 64g --ipc=host --ulimit memlock=-1:-1 --ulimit nofile=1048576:1048576 -p 30000:30000 -e CUDA_VISIBLE_DEVICES=0,1,2 -e PYTORCH_ALLOC_CONF=expandable_segments:True -v /root/data/AWQ:/workspace/model -v ` + homeDir + `/.cache/huggingface:/root/.cache/huggingface lmsysorg/sglang:latest python3 -m sglang.launch_server --model-path /workspace/model --host 0.0.0.0 --port 30000 --dp-size 3 --tp-size 1 --max-running-requests 32 --max-total-tokens 262144 --context-length 131072 --mem-fraction-static 0.88 --chunked-prefill-size 8192 --schedule-policy fcfs --kv-cache-dtype fp8_e4m3 --attention-backend flashinfer --disable-radix-cache --reasoning-parser glm45 --tool-call-parser glm --trust-remote-code --log-level info` + expected := `docker run -d --name glm_model --restart unless-stopped --gpus "device=0,1,2" --shm-size 64g --ipc=host --ulimit memlock=-1:-1 --ulimit nofile=1048576:1048576 -p 30000:30000 -e CUDA_VISIBLE_DEVICES=0,1,2 -e PYTORCH_ALLOC_CONF=expandable_segments:True -v /root/data/AWQ:/workspace/model -v ` + homeDir + `/.cache/huggingface:/root/.cache/huggingface lmsysorg/sglang:latest python3 -m sglang.launch_server --model-path /workspace/model --host 0.0.0.0 --port 30000 --dp-size 3 --tp-size 1 --max-running-requests 32 --max-total-tokens 262144 --context-length 131072 --mem-fraction-static 0.88 --chunked-prefill-size -1 --schedule-policy fcfs --kv-cache-dtype fp8_e4m3 --attention-backend flashinfer --disable-radix-cache --reasoning-parser glm45 --tool-call-parser glm --trust-remote-code --log-level info` if cmd != expected { t.Errorf("Docker command mismatch.\n\nGot:\n%s\n\nExpected:\n%s", cmd, expected) @@ -60,7 +60,7 @@ func TestBuildDockerRunArgs_SGLangFlags(t *testing.T) { "--max-total-tokens 262144", "--context-length 131072", "--mem-fraction-static 0.88", - "--chunked-prefill-size 8192", + "--chunked-prefill-size -1", "--schedule-policy fcfs", "--kv-cache-dtype fp8_e4m3", "--attention-backend flashinfer", diff --git a/internal/installer/installer.go b/internal/installer/installer.go index 60d2bae..b6e6c64 100644 --- a/internal/installer/installer.go +++ b/internal/installer/installer.go @@ -122,11 +122,36 @@ func (i *Installer) generateConfigs() error { func (i *Installer) pullImages(ctx context.Context) error { i.logger.Info("Pulling Docker images...") - if err := docker.Pull(ctx, i.paths.ComposeFile); err != nil { - return err + // Determine which services to pull + services := []string{"backend", "frontend"} + if i.config.EnableDeepResearch { + services = append(services, "deep-research") + } + + // Pull each service, tracking failures + results := docker.Pull(ctx, i.paths.ComposeFile, services...) + + var failed []string + for _, r := range results { + if r.Error != nil { + i.logger.Warn("Failed to pull %s: %v", r.Service, r.Error) + failed = append(failed, r.Service) + } else { + i.logger.Success("Pulled %s", r.Service) + } + } + + // Fail only if critical services (backend/frontend) failed + for _, f := range failed { + if f == "backend" || f == "frontend" { + return fmt.Errorf("failed to pull critical service: %s", f) + } + } + + if len(failed) > 0 { + i.logger.Warn("Some non-critical images failed to pull, continuing anyway") } - i.logger.Success("Docker images pulled") return nil } diff --git a/internal/updater/updater.go b/internal/updater/updater.go index 07d0752..4404341 100644 --- a/internal/updater/updater.go +++ b/internal/updater/updater.go @@ -131,11 +131,36 @@ func (u *Updater) updateConfigWithLatestVersion(ctx context.Context) (string, er func (u *Updater) pullImages(ctx context.Context) error { u.logger.Info("Pulling latest Docker images...") - if err := docker.Pull(ctx, u.paths.ComposeFile); err != nil { - return err + // Determine which services to pull + services := []string{"backend", "frontend"} + if u.config.EnableDeepResearch { + services = append(services, "deep-research") + } + + // Pull each service, tracking failures + results := docker.Pull(ctx, u.paths.ComposeFile, services...) + + var failed []string + for _, r := range results { + if r.Error != nil { + u.logger.Warn("Failed to pull %s: %v", r.Service, r.Error) + failed = append(failed, r.Service) + } else { + u.logger.Success("Pulled %s", r.Service) + } + } + + // Fail only if critical services (backend/frontend) failed + for _, f := range failed { + if f == "backend" || f == "frontend" { + return fmt.Errorf("failed to pull critical service: %s", f) + } + } + + if len(failed) > 0 { + u.logger.Warn("Some non-critical images failed to pull, continuing anyway") } - u.logger.Success("Docker images pulled") return nil } diff --git a/internal/version/version.go b/internal/version/version.go index 8d639bc..9615f74 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -180,10 +180,10 @@ func compareSemanticVersions(v1, v2 string) int { for i := 0; i < maxLen; i++ { var n1, n2 int if i < len(parts1) { - fmt.Sscanf(parts1[i], "%d", &n1) + _, _ = fmt.Sscanf(parts1[i], "%d", &n1) } if i < len(parts2) { - fmt.Sscanf(parts2[i], "%d", &n2) + _, _ = fmt.Sscanf(parts2[i], "%d", &n2) } if n1 > n2 { return 1 diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go index a18a708..ab63f08 100644 --- a/pkg/logger/logger.go +++ b/pkg/logger/logger.go @@ -32,7 +32,7 @@ func (l *Logger) Success(msg string, args ...interface{}) { return } green := color.New(color.FgGreen).SprintfFunc() - fmt.Fprintf(os.Stdout, green("✓ "+msg+"\n", args...)) + fmt.Fprint(os.Stdout, green("✓ "+msg+"\n", args...)) } func (l *Logger) Warn(msg string, args ...interface{}) { @@ -40,7 +40,7 @@ func (l *Logger) Warn(msg string, args ...interface{}) { return } yellow := color.New(color.FgYellow).SprintfFunc() - fmt.Fprintf(os.Stderr, yellow("⚠ "+msg+"\n", args...)) + fmt.Fprint(os.Stderr, yellow("⚠ "+msg+"\n", args...)) } func (l *Logger) Error(msg string, args ...interface{}) { @@ -48,7 +48,7 @@ func (l *Logger) Error(msg string, args ...interface{}) { return } red := color.New(color.FgRed).SprintfFunc() - fmt.Fprintf(os.Stderr, red("✗ "+msg+"\n", args...)) + fmt.Fprint(os.Stderr, red("✗ "+msg+"\n", args...)) } func (l *Logger) Debug(msg string, args ...interface{}) { @@ -57,6 +57,6 @@ func (l *Logger) Debug(msg string, args ...interface{}) { } if l.verbose { cyan := color.New(color.FgCyan).SprintfFunc() - fmt.Fprintf(os.Stdout, cyan("[DEBUG] "+msg+"\n", args...)) + fmt.Fprint(os.Stdout, cyan("[DEBUG] "+msg+"\n", args...)) } }