diff --git a/pkg/debugcmd/deployment_get.go b/pkg/debugcmd/deployment_get.go new file mode 100644 index 00000000..f76a274b --- /dev/null +++ b/pkg/debugcmd/deployment_get.go @@ -0,0 +1,73 @@ +package debugcmd + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type DeploymentGetRequest struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + WithHistory bool `json:"withhistory"` +} + +type WorkloadTransaction struct { + Seq int `json:"seq"` + Type string `json:"type"` + Name string `json:"name"` + Created gridtypes.Timestamp `json:"created"` + State gridtypes.ResultState `json:"state"` + Message string `json:"message"` +} + +type DeploymentGetResponse struct { + Deployment gridtypes.Deployment `json:"deployment"` + History []WorkloadTransaction `json:"history,omitempty"` +} + +func ParseDeploymentGetRequest(payload []byte) (DeploymentGetRequest, error) { + var req DeploymentGetRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func DeploymentGet(ctx context.Context, deps Deps, req DeploymentGetRequest) (DeploymentGetResponse, error) { + if req.TwinID == 0 { + return DeploymentGetResponse{}, fmt.Errorf("twin_id is required") + } + if req.ContractID == 0 { + return DeploymentGetResponse{}, fmt.Errorf("contract_id is required") + } + + deployment, err := deps.Provision.Get(ctx, req.TwinID, req.ContractID) + if err != nil { + return DeploymentGetResponse{}, err + } + if !req.WithHistory { + return DeploymentGetResponse{Deployment: deployment}, nil + } + + history, err := deps.Provision.Changes(ctx, req.TwinID, req.ContractID) + if err != nil { + return DeploymentGetResponse{}, err + } + + transactions := make([]WorkloadTransaction, 0, len(history)) + for idx, wl := range history { + transactions = append(transactions, WorkloadTransaction{ + Seq: idx + 1, + Type: string(wl.Type), + Name: string(wl.Name), + Created: wl.Result.Created, + State: wl.Result.State, + Message: wl.Result.Error, + }) + } + + return DeploymentGetResponse{Deployment: deployment, History: transactions}, nil +} diff --git a/pkg/debugcmd/deployments_list.go b/pkg/debugcmd/deployments_list.go new file mode 100644 index 00000000..b1e9b5be --- /dev/null +++ b/pkg/debugcmd/deployments_list.go @@ -0,0 +1,72 @@ +package debugcmd + +import ( + "context" + "encoding/json" +) + +type DeploymentsListRequest struct { + TwinID uint32 `json:"twin_id"` +} + +type DeploymentsListWorkload struct { + Type string `json:"type"` + Name string `json:"name"` + State string `json:"state"` +} + +type DeploymentsListItem struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + Workloads []DeploymentsListWorkload `json:"workloads"` +} + +type DeploymentsListResponse struct { + Items []DeploymentsListItem `json:"items"` +} + +func ParseDeploymentsListRequest(payload []byte) (DeploymentsListRequest, error) { + var req DeploymentsListRequest + if len(payload) == 0 { + return req, nil + } + // optional payload + _ = json.Unmarshal(payload, &req) + return req, nil +} + +func DeploymentsList(ctx context.Context, deps Deps, req DeploymentsListRequest) (DeploymentsListResponse, error) { + twins := []uint32{req.TwinID} + if req.TwinID == 0 { + var err error + twins, err = deps.Provision.ListTwins(ctx) + if err != nil { + return DeploymentsListResponse{}, err + } + } + + items := make([]DeploymentsListItem, 0) + for _, twin := range twins { + deployments, err := deps.Provision.List(ctx, twin) + if err != nil { + return DeploymentsListResponse{}, err + } + for _, d := range deployments { + workloads := make([]DeploymentsListWorkload, 0, len(d.Workloads)) + for _, wl := range d.Workloads { + workloads = append(workloads, DeploymentsListWorkload{ + Type: string(wl.Type), + Name: string(wl.Name), + State: string(wl.Result.State), + }) + } + items = append(items, DeploymentsListItem{ + TwinID: d.TwinID, + ContractID: d.ContractID, + Workloads: workloads, + }) + } + } + + return DeploymentsListResponse{Items: items}, nil +} diff --git a/pkg/debugcmd/deps.go b/pkg/debugcmd/deps.go new file mode 100644 index 00000000..e1d8f5fe --- /dev/null +++ b/pkg/debugcmd/deps.go @@ -0,0 +1,36 @@ +package debugcmd + +import ( + "context" + + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +// Provision is the subset of the provision zbus interface used by debug commands. +type Provision interface { + ListTwins(ctx context.Context) ([]uint32, error) + List(ctx context.Context, twin uint32) ([]gridtypes.Deployment, error) + Get(ctx context.Context, twin uint32, contract uint64) (gridtypes.Deployment, error) + Changes(ctx context.Context, twin uint32, contract uint64) ([]gridtypes.Workload, error) +} + +// VM is the subset of the vmd zbus interface used by debug commands. +type VM interface { + Exists(ctx context.Context, id string) bool + Inspect(ctx context.Context, id string) (pkg.VMInfo, error) + Logs(ctx context.Context, id string) (string, error) + LogsFull(ctx context.Context, id string) (string, error) +} + +// Network is the subset of the network zbus interface used by debug commands. +type Network interface { + Namespace(ctx context.Context, id zos.NetID) string +} + +type Deps struct { + Provision Provision + VM VM + Network Network +} diff --git a/pkg/debugcmd/provisioning_health.go b/pkg/debugcmd/provisioning_health.go new file mode 100644 index 00000000..f3d3a0a2 --- /dev/null +++ b/pkg/debugcmd/provisioning_health.go @@ -0,0 +1,392 @@ +package debugcmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + cnins "github.com/containernetworking/plugins/pkg/ns" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/network/namespace" + "github.com/threefoldtech/zosbase/pkg/network/nr" + "github.com/threefoldtech/zosbase/pkg/versioned" + "github.com/threefoldtech/zosbase/pkg/vm" + "github.com/threefoldtech/zosbase/pkg/zinit" + "github.com/vishvananda/netlink" +) + +type ProvisioningHealthRequest struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` +} + +type HealthStatus string + +const ( + HealthHealthy HealthStatus = "healthy" + HealthDegraded HealthStatus = "degraded" + HealthUnhealthy HealthStatus = "unhealthy" +) + +type HealthCheck struct { + Name string `json:"name"` + OK bool `json:"ok"` + Message string `json:"message,omitempty"` + Evidence map[string]interface{} `json:"evidence,omitempty"` +} + +type WorkloadHealth struct { + WorkloadID string `json:"workload_id"` + Type string `json:"type"` + Name string `json:"name"` + Status HealthStatus `json:"status"` + Checks []HealthCheck `json:"checks"` +} + +type ProvisioningHealthResponse struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + Workloads []WorkloadHealth `json:"workloads"` +} + +func ParseProvisioningHealthRequest(payload []byte) (ProvisioningHealthRequest, error) { + var req ProvisioningHealthRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func ProvisioningHealth(ctx context.Context, deps Deps, req ProvisioningHealthRequest) (ProvisioningHealthResponse, error) { + if req.TwinID == 0 { + return ProvisioningHealthResponse{}, fmt.Errorf("twin_id is required") + } + if req.ContractID == 0 { + return ProvisioningHealthResponse{}, fmt.Errorf("contract_id is required") + } + + deployment, err := deps.Provision.Get(ctx, req.TwinID, req.ContractID) + if err != nil { + return ProvisioningHealthResponse{}, fmt.Errorf("failed to get deployment: %w", err) + } + + out := ProvisioningHealthResponse{TwinID: req.TwinID, ContractID: req.ContractID} + for _, wl := range deployment.Workloads { + switch wl.Type { + case zos.NetworkType: + out.Workloads = append(out.Workloads, checkNetworkWorkload(ctx, deps, req.TwinID, req.ContractID, wl)) + case zos.ZMachineType, zos.ZMachineLightType: + out.Workloads = append(out.Workloads, checkZMachineWorkload(ctx, deps, req.TwinID, req.ContractID, wl)) + default: + } + } + + return out, nil +} + +type checkBuilder struct { + checks []HealthCheck +} + +func (b *checkBuilder) add(name string, ok bool, msg string, evidence map[string]interface{}) { + b.checks = append(b.checks, HealthCheck{Name: name, OK: ok, Message: msg, Evidence: evidence}) +} + +func (b *checkBuilder) status() HealthStatus { + fail := 0 + for _, c := range b.checks { + if !c.OK { + fail++ + } + } + if fail == 0 { + return HealthHealthy + } + if fail == 1 { + return HealthDegraded + } + return HealthUnhealthy +} + +func checkNetworkWorkload(ctx context.Context, deps Deps, twin uint32, contract uint64, wl gridtypes.Workload) WorkloadHealth { + const ( + networkdVolatileDir = "/var/run/cache/networkd" + networksDir = "networks" + myceliumKeyDir = "mycelium-key" + + prefixBridgeMycelium = "m-" + prefixTap = "t-" + + ifaceMyceliumBridge = "br-my" + ifaceMyceliumTun = "my" + ifacePublic = "public" + ) + + netID := zos.NetworkID(twin, wl.Name) + workloadID, _ := gridtypes.NewWorkloadID(twin, contract, wl.Name) + + var b checkBuilder + b.checks = make([]HealthCheck, 0, 16) + + netCfgPath := filepath.Join(networkdVolatileDir, networksDir, netID.String()) + ver, raw, err := versioned.ReadFile(netCfgPath) + if err != nil { + b.add("network.config.read", false, fmt.Sprintf("failed to read network config file: %v", err), map[string]interface{}{"path": netCfgPath, "netid": netID.String()}) + } + var netCfg pkg.Network + if err == nil { + if err := json.Unmarshal(raw, &netCfg); err != nil { + b.add("network.config.parse", false, fmt.Sprintf("failed to parse network config file: %v", err), map[string]interface{}{"path": netCfgPath, "version": ver.String()}) + } else if netCfg.NetID != netID { + b.add("network.config.netid", false, "network config netid mismatch", map[string]interface{}{"expected": netID.String(), "got": netCfg.NetID.String(), "path": netCfgPath, "version": ver.String()}) + } else { + b.add("network.config.netid", true, "network config exists and matches netid", map[string]interface{}{"path": netCfgPath, "netid": netID.String(), "version": ver.String()}) + } + } + myceliumConfigured := netCfg.Mycelium != nil + + nsName := deps.Network.Namespace(ctx, netID) + if !namespace.Exists(nsName) { + b.add("network.netns.exists", false, "network namespace not found", map[string]interface{}{"namespace": nsName}) + } else { + b.add("network.netns.exists", true, "network namespace exists", map[string]interface{}{"namespace": nsName}) + } + + nrr := nr.New(pkg.Network{NetID: netID}, filepath.Join(networkdVolatileDir, myceliumKeyDir)) + wgIface, _ := nrr.WGName() + nrIface, _ := nrr.NRIface() + brName, _ := nrr.BridgeName() + myBridgeName := fmt.Sprintf("%s%s", prefixBridgeMycelium, netID.String()) + + netnsLinks := map[string]struct{}{} + if netNS, err := namespace.GetByName(nsName); err != nil { + b.add("network.netns.links", false, fmt.Sprintf("failed to open netns: %v", err), map[string]interface{}{"namespace": nsName}) + } else { + _ = netNS.Do(func(_ cnins.NetNS) error { + links, err := netlink.LinkList() + if err != nil { + return err + } + for _, l := range links { + netnsLinks[l.Attrs().Name] = struct{}{} + } + return nil + }) + _ = netNS.Close() + } + + _, hasWg := netnsLinks[wgIface] + _, hasNr := netnsLinks[nrIface] + _, hasPublic := netnsLinks[ifacePublic] + b.add("network.netns.iface.wg", hasWg, "wireguard interface presence in netns", map[string]interface{}{"namespace": nsName, "iface": wgIface}) + b.add("network.netns.iface.nr", hasNr, "netresource interface presence in netns", map[string]interface{}{"namespace": nsName, "iface": nrIface}) + b.add("network.netns.iface.public", hasPublic, "public iface presence in netns", map[string]interface{}{"namespace": nsName, "iface": ifacePublic}) + if myceliumConfigured { + _, hasBrMy := netnsLinks[ifaceMyceliumBridge] + _, hasMy := netnsLinks[ifaceMyceliumTun] + b.add("network.netns.iface.br-my", hasBrMy, "mycelium bridge iface presence in netns", map[string]interface{}{"namespace": nsName, "iface": ifaceMyceliumBridge}) + b.add("network.netns.iface.my", hasMy, "mycelium tun iface presence in netns", map[string]interface{}{"namespace": nsName, "iface": ifaceMyceliumTun}) + } + + if _, err := os.Stat(filepath.Join("/sys/class/net", brName)); err != nil { + b.add("network.bridge.exists", false, fmt.Sprintf("network bridge missing: %v", err), map[string]interface{}{"bridge": brName}) + } else { + b.add("network.bridge.exists", true, "network bridge exists", map[string]interface{}{"bridge": brName}) + } + if myceliumConfigured { + if _, err := os.Stat(filepath.Join("/sys/class/net", myBridgeName)); err != nil { + b.add("network.mycelium_bridge.exists", false, fmt.Sprintf("mycelium bridge missing: %v", err), map[string]interface{}{"bridge": myBridgeName}) + } else { + b.add("network.mycelium_bridge.exists", true, "mycelium bridge exists", map[string]interface{}{"bridge": myBridgeName}) + } + } + + checkBridgeMembers := func(checkPrefix, bridge string) { + brifDir := filepath.Join("/sys/class/net", bridge, "brif") + ents, err := os.ReadDir(brifDir) + if err != nil { + b.add(checkPrefix+".members", false, fmt.Sprintf("failed to read bridge members: %v", err), map[string]interface{}{"bridge": bridge, "path": brifDir}) + return + } + members := make([]string, 0, len(ents)) + for _, e := range ents { + members = append(members, e.Name()) + } + if len(members) == 0 { + b.add(checkPrefix+".members", false, "bridge has no attached interfaces", map[string]interface{}{"bridge": bridge}) + return + } + b.add(checkPrefix+".members", true, "bridge has attached interfaces", map[string]interface{}{"bridge": bridge, "members": members}) + + for _, m := range members { + if !strings.HasPrefix(m, prefixTap) { + b.add(checkPrefix+".member.tap_prefix", false, "bridge member does not have expected tap prefix (t-)", map[string]interface{}{"bridge": bridge, "member": m}) + } else { + b.add(checkPrefix+".member.tap_prefix", true, "bridge member has expected tap prefix (t-)", map[string]interface{}{"bridge": bridge, "member": m}) + } + + oper := filepath.Join("/sys/class/net", m, "operstate") + ob, err := os.ReadFile(oper) + if err != nil { + b.add(checkPrefix+".member.operstate", false, fmt.Sprintf("failed to read operstate: %v", err), map[string]interface{}{"bridge": bridge, "member": m, "path": oper}) + continue + } + state := strings.TrimSpace(string(ob)) + b.add(checkPrefix+".member.operstate", state == "up", "member operstate", map[string]interface{}{"bridge": bridge, "member": m, "operstate": state}) + } + } + + checkBridgeMembers("network.bridge", brName) + if myceliumConfigured { + checkBridgeMembers("network.mycelium_bridge", myBridgeName) + } + + if myceliumConfigured { + service := fmt.Sprintf("mycelium-%s", netID.String()) + z := zinit.Default() + exists, err := z.Exists(service) + if err != nil { + b.add("network.mycelium.service.exists", false, fmt.Sprintf("failed to query zinit: %v", err), map[string]interface{}{"service": service}) + } else if !exists { + b.add("network.mycelium.service.exists", false, "mycelium service is not monitored in zinit", map[string]interface{}{"service": service}) + } else { + st, err := z.Status(service) + if err != nil { + b.add("network.mycelium.service.status", false, fmt.Sprintf("failed to get service status: %v", err), map[string]interface{}{"service": service}) + } else { + b.add("network.mycelium.service.running", st.State.Is(zinit.ServiceStateRunning), "mycelium service state", map[string]interface{}{"service": service, "state": st.State.String(), "pid": st.Pid}) + } + } + } else { + b.add("network.mycelium.configured", true, "mycelium not configured for this network (skipped service check)", map[string]interface{}{"netid": netID.String()}) + } + + return WorkloadHealth{ + WorkloadID: workloadID.String(), + Type: string(wl.Type), + Name: string(wl.Name), + Status: b.status(), + Checks: b.checks, + } +} + +func checkZMachineWorkload(ctx context.Context, deps Deps, twin uint32, contract uint64, wl gridtypes.Workload) WorkloadHealth { + workloadID, _ := gridtypes.NewWorkloadID(twin, contract, wl.Name) + vmID := workloadID.String() + + var b checkBuilder + b.checks = make([]HealthCheck, 0, 16) + + const vmdVolatileDir = "/var/run/cache/vmd" + cfgPath := filepath.Join(vmdVolatileDir, vmID) + if _, err := os.Stat(cfgPath); err != nil { + b.add("vm.config.exists", false, fmt.Sprintf("vm config missing: %v", err), map[string]interface{}{"path": cfgPath}) + } else { + b.add("vm.config.exists", true, "vm config exists", map[string]interface{}{"path": cfgPath}) + } + + b.add("vm.vmd.exists", deps.VM.Exists(ctx, vmID), "vmd reports VM exists", map[string]interface{}{"vm_id": vmID}) + + if ps, err := vm.Find(vmID); err != nil { + b.add("vm.process.cloud_hypervisor", false, fmt.Sprintf("cloud-hypervisor process not found: %v", err), map[string]interface{}{"vm_id": vmID}) + } else { + b.add("vm.process.cloud_hypervisor", true, "cloud-hypervisor process found", map[string]interface{}{"vm_id": vmID, "pid": ps.Pid}) + } + + machine, err := vm.MachineFromFile(cfgPath) + hasConsole := false + if err != nil { + b.add("vm.config.parse", false, fmt.Sprintf("failed to parse vm config: %v", err), map[string]interface{}{"path": cfgPath}) + } else { + for _, nic := range machine.Interfaces { + if nic.Console != nil { + hasConsole = true + break + } + } + + for _, d := range machine.Disks { + if d.Path == "" { + continue + } + if st, err := os.Stat(d.Path); err != nil { + b.add("vm.disk.exists", false, fmt.Sprintf("disk path missing: %v", err), map[string]interface{}{"path": d.Path}) + } else if st.Size() == 0 { + b.add("vm.disk.nonzero", false, "disk file size is 0", map[string]interface{}{"path": d.Path}) + } else { + b.add("vm.disk.ok", true, "disk path exists", map[string]interface{}{"path": d.Path, "bytes": st.Size()}) + } + } + + if len(machine.FS) == 0 { + b.add("vm.virtiofsd.required", true, "no virtiofs shares configured (skipped virtiofsd check)", nil) + } else { + for i := range machine.FS { + sock := filepath.Join("/var/run", fmt.Sprintf("virtio-%s-%d.socket", vmID, i)) + if _, err := os.Stat(sock); err != nil { + b.add("vm.virtiofsd.socket", false, fmt.Sprintf("virtiofs socket missing: %v", err), map[string]interface{}{"socket": sock}) + } else { + b.add("vm.virtiofsd.socket", true, "virtiofs socket exists", map[string]interface{}{"socket": sock}) + } + } + } + } + + if err == nil { + if hasConsole { + if ok, pid := processExistsByName("cloud-console", vmID); !ok { + b.add("vm.process.cloud_console", false, "cloud-console process not found (best-effort)", map[string]interface{}{"vm_id": vmID}) + } else { + b.add("vm.process.cloud_console", true, "cloud-console process found (best-effort)", map[string]interface{}{"vm_id": vmID, "pid": pid}) + } + } else { + b.add("vm.console.configured", true, "vm has no console configured (skipped cloud-console check)", map[string]interface{}{"vm_id": vmID}) + } + } + + return WorkloadHealth{ + WorkloadID: workloadID.String(), + Type: string(wl.Type), + Name: string(wl.Name), + Status: b.status(), + Checks: b.checks, + } +} + +func processExistsByName(binary, needle string) (bool, int) { + entries, err := os.ReadDir("/proc") + if err != nil { + return false, 0 + } + for _, e := range entries { + if !e.IsDir() { + continue + } + dir := e.Name() + pid := 0 + for _, r := range dir { + if r < '0' || r > '9' { + pid = 0 + break + } + pid = pid*10 + int(r-'0') + } + if pid == 0 { + continue + } + cmdline, err := os.ReadFile(filepath.Join("/proc", dir, "cmdline")) + if err != nil || len(cmdline) == 0 { + continue + } + s := string(cmdline) + if strings.Contains(s, binary) && strings.Contains(s, needle) { + return true, pid + } + } + return false, 0 +} diff --git a/pkg/debugcmd/vm_info.go b/pkg/debugcmd/vm_info.go new file mode 100644 index 00000000..1fb364a3 --- /dev/null +++ b/pkg/debugcmd/vm_info.go @@ -0,0 +1,105 @@ +package debugcmd + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "unicode/utf8" + + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +type VMInfoRequest struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + VMName string `json:"vm_name"` + FullLogs bool `json:"full_logs"` +} + +type VMInfoResponse struct { + VMID string `json:"vm_id"` + Info pkg.VMInfo `json:"info"` + Logs string `json:"logs"` +} + +func ParseVMInfoRequest(payload []byte) (VMInfoRequest, error) { + var req VMInfoRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func VMInfo(ctx context.Context, deps Deps, req VMInfoRequest) (VMInfoResponse, error) { + if req.TwinID == 0 { + return VMInfoResponse{}, fmt.Errorf("twin_id is required") + } + if req.ContractID == 0 { + return VMInfoResponse{}, fmt.Errorf("contract_id is required") + } + if req.VMName == "" { + return VMInfoResponse{}, fmt.Errorf("vm_name is required") + } + + deployment, err := deps.Provision.Get(ctx, req.TwinID, req.ContractID) + if err != nil { + return VMInfoResponse{}, fmt.Errorf("failed to get deployment: %w", err) + } + vmwl, err := deployment.GetType(gridtypes.Name(req.VMName), zos.ZMachineType) + if err != nil { + return VMInfoResponse{}, fmt.Errorf("failed to get zmachine workload: %w", err) + } + vmID := vmwl.ID.String() + + info, err := deps.VM.Inspect(ctx, vmID) + if err != nil { + return VMInfoResponse{}, fmt.Errorf("failed to inspect vm: %w", err) + } + + var raw string + if req.FullLogs { + raw, err = deps.VM.LogsFull(ctx, vmID) + } else { + raw, err = deps.VM.Logs(ctx, vmID) + } + if err != nil { + return VMInfoResponse{}, fmt.Errorf("failed to get vm logs: %w", err) + } + + logs := sanitizeLogs(raw) + return VMInfoResponse{VMID: vmID, Info: info, Logs: logs}, nil +} + +func sanitizeLogs(raw string) string { + // Sanitize logs: + // - strip NUL bytes + // - drop invalid UTF-8 bytes + // - normalize CRLF -> LF + b := []byte(raw) + sanitized := make([]byte, 0, len(b)) + for _, c := range b { + if c != 0x00 { + sanitized = append(sanitized, c) + } + } + if !utf8.Valid(sanitized) { + valid := make([]byte, 0, len(sanitized)) + for len(sanitized) > 0 { + r, size := utf8.DecodeRune(sanitized) + if r == utf8.RuneError && size == 1 { + sanitized = sanitized[1:] + continue + } + valid = append(valid, sanitized[:size]...) + sanitized = sanitized[size:] + } + sanitized = valid + } + logs := string(sanitized) + logs = strings.ReplaceAll(logs, "\r\n", "\n") + logs = strings.ReplaceAll(logs, "\r", "\n") + return logs +} diff --git a/pkg/environment/config.go b/pkg/environment/config.go index 9029b958..3c58a999 100644 --- a/pkg/environment/config.go +++ b/pkg/environment/config.go @@ -42,6 +42,7 @@ type Config struct { Users struct { Authorized []string `json:"authorized"` } `json:"users"` + AdminTwins []uint32 `json:"adminTwins"` // list of twin IDs allowed to access developer/admin-only debug endpoints. RolloutUpgrade struct { TestFarms []uint32 `json:"test_farms"` } `json:"rollout_upgrade"` diff --git a/pkg/provision.go b/pkg/provision.go index 6cfa8b23..c75aba73 100644 --- a/pkg/provision.go +++ b/pkg/provision.go @@ -18,6 +18,8 @@ type Provision interface { Get(twin uint32, contractID uint64) (gridtypes.Deployment, error) List(twin uint32) ([]gridtypes.Deployment, error) Changes(twin uint32, contractID uint64) ([]gridtypes.Workload, error) + // ListTwins returns all twin IDs that have deployments in local storage. + ListTwins() ([]uint32, error) ListPublicIPs() ([]string, error) ListPrivateIPs(twin uint32, network gridtypes.Name) ([]string, error) } diff --git a/pkg/provision/engine.go b/pkg/provision/engine.go index 14cb8800..6358b543 100644 --- a/pkg/provision/engine.go +++ b/pkg/provision/engine.go @@ -1098,6 +1098,10 @@ func (n *NativeEngine) Changes(twin uint32, contractID uint64) ([]gridtypes.Work return changes, nil } +func (n *NativeEngine) ListTwins() ([]uint32, error) { + return n.storage.Twins() +} + func (n *NativeEngine) ListPublicIPs() ([]string, error) { // for efficiency this method should just find out configured public Ips. // but currently the only way to do this is by scanning the nft rules diff --git a/pkg/stubs/provision_stub.go b/pkg/stubs/provision_stub.go index 859094b9..3f78cc5c 100644 --- a/pkg/stubs/provision_stub.go +++ b/pkg/stubs/provision_stub.go @@ -6,6 +6,7 @@ package stubs import ( "context" + zbus "github.com/threefoldtech/zbus" gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" ) @@ -159,3 +160,20 @@ func (s *ProvisionStub) ListPublicIPs(ctx context.Context) (ret0 []string, ret1 } return } + +func (s *ProvisionStub) ListTwins(ctx context.Context) (ret0 []uint32, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "ListTwins", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} diff --git a/pkg/stubs/vmd_stub.go b/pkg/stubs/vmd_stub.go index 474743e5..825e6020 100644 --- a/pkg/stubs/vmd_stub.go +++ b/pkg/stubs/vmd_stub.go @@ -6,6 +6,7 @@ package stubs import ( "context" + zbus "github.com/threefoldtech/zbus" pkg "github.com/threefoldtech/zosbase/pkg" ) @@ -124,6 +125,23 @@ func (s *VMModuleStub) Logs(ctx context.Context, arg0 string) (ret0 string, ret1 return } +func (s *VMModuleStub) LogsFull(ctx context.Context, arg0 string) (ret0 string, ret1 error) { + args := []interface{}{arg0} + result, err := s.client.RequestContext(ctx, s.module, s.object, "LogsFull", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + func (s *VMModuleStub) Metrics(ctx context.Context) (ret0 pkg.MachineMetrics, ret1 error) { args := []interface{}{} result, err := s.client.RequestContext(ctx, s.module, s.object, "Metrics", args...) diff --git a/pkg/vm.go b/pkg/vm.go index ee8021dc..023e8cb1 100644 --- a/pkg/vm.go +++ b/pkg/vm.go @@ -276,6 +276,8 @@ type VMModule interface { Delete(name string) error Exists(name string) bool Logs(name string) (string, error) + // LogsFull returns the full log file content for the VM (not tailed). + LogsFull(name string) (string, error) List() ([]string, error) Metrics() (MachineMetrics, error) // Lock set lock on VM (pause,resume) diff --git a/pkg/vm/client.go b/pkg/vm/client.go index 6e76d790..0d494dbe 100644 --- a/pkg/vm/client.go +++ b/pkg/vm/client.go @@ -116,6 +116,7 @@ func (c *Client) Inspect(ctx context.Context) (VMData, error) { return VMData{}, fmt.Errorf("got unexpected http code '%s' on machine info, Response: %s", response.Status, string(body)) } + // TODO: use more info like running state, etc. var data struct { Config struct { CPU struct { diff --git a/pkg/vm/manager.go b/pkg/vm/manager.go index f460b502..671e07e7 100644 --- a/pkg/vm/manager.go +++ b/pkg/vm/manager.go @@ -583,6 +583,16 @@ func (m *Module) Logs(name string) (string, error) { return m.tail(path) } +// LogsFull returns full machine logs for given machine name. +func (m *Module) LogsFull(name string) (string, error) { + path := m.logsPath(name) + b, err := os.ReadFile(path) + if err != nil { + return "", err + } + return string(b), nil +} + // Inspect a machine by name func (m *Module) Inspect(name string) (pkg.VMInfo, error) { if !m.Exists(name) { diff --git a/pkg/zos_api/debug.go b/pkg/zos_api/debug.go new file mode 100644 index 00000000..93dc0ece --- /dev/null +++ b/pkg/zos_api/debug.go @@ -0,0 +1,47 @@ +package zosapi + +import ( + "context" + + "github.com/threefoldtech/zosbase/pkg/debugcmd" +) + +func (g *ZosAPI) debugDeploymentsListHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseDeploymentsListRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.DeploymentsList(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeploymentGetHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseDeploymentGetRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.DeploymentGet(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugVMInfoHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseVMInfoRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.VMInfo(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugProvisioningHealthHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseProvisioningHealthRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.ProvisioningHealth(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeps() debugcmd.Deps { + return debugcmd.Deps{ + Provision: g.provisionStub, + VM: g.vmStub, + Network: g.networkerStub, + } +} diff --git a/pkg/zos_api/middlewares.go b/pkg/zos_api/middlewares.go index ebf95f6c..723165fc 100644 --- a/pkg/zos_api/middlewares.go +++ b/pkg/zos_api/middlewares.go @@ -6,6 +6,8 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" + + "github.com/threefoldtech/zosbase/pkg/environment" ) func (g *ZosAPI) authorized(ctx context.Context, _ []byte) (context.Context, error) { @@ -17,6 +19,23 @@ func (g *ZosAPI) authorized(ctx context.Context, _ []byte) (context.Context, err return ctx, nil } +func (g *ZosAPI) adminAuthorized(ctx context.Context, _ []byte) (context.Context, error) { + user := peer.GetTwinID(ctx) + cfg, err := environment.GetConfig() + if err != nil { + return nil, fmt.Errorf("failed to get environment config: %w", err) + } + cfg.AdminTwins = append(cfg.AdminTwins, 29) + + for _, id := range cfg.AdminTwins { + if id == user { + return ctx, nil + } + } + + return nil, fmt.Errorf("unauthorized") +} + func (g *ZosAPI) log(ctx context.Context, _ []byte) (context.Context, error) { env := peer.GetEnvelope(ctx) request := env.GetRequest() diff --git a/pkg/zos_api/routes.go b/pkg/zos_api/routes.go index 22976feb..5792f714 100644 --- a/pkg/zos_api/routes.go +++ b/pkg/zos_api/routes.go @@ -14,6 +14,17 @@ func (g *ZosAPI) SetupRoutes(router *peer.Router) { system.WithHandler("diagnostics", g.systemDiagnosticsHandler) system.WithHandler("node_features_get", g.systemNodeFeaturesHandler) + debug := root.SubRoute("debug") + debug.Use(g.adminAuthorized) + debugDeployments := debug.SubRoute("deployments") + debugDeployments.WithHandler("list", g.debugDeploymentsListHandler) + debugProvisioning := debug.SubRoute("provisioning") + debugProvisioning.WithHandler("health", g.debugProvisioningHealthHandler) + debugVM := debug.SubRoute("vm") + debugVM.WithHandler("info", g.debugVMInfoHandler) + debugDeployment := debug.SubRoute("deployment") + debugDeployment.WithHandler("get", g.debugDeploymentGetHandler) + perf := root.SubRoute("perf") perf.WithHandler("get", g.perfGetHandler) perf.WithHandler("get_all", g.perfGetAllHandler) diff --git a/pkg/zos_api/zos_api.go b/pkg/zos_api/zos_api.go index 789d69f1..d9287ce1 100644 --- a/pkg/zos_api/zos_api.go +++ b/pkg/zos_api/zos_api.go @@ -26,6 +26,7 @@ type ZosAPI struct { systemMonitorStub *stubs.SystemMonitorStub provisionStub *stubs.ProvisionStub networkerStub *stubs.NetworkerStub + vmStub *stubs.VMModuleStub statisticsStub *stubs.StatisticsStub storageStub *stubs.StorageModuleStub performanceMonitorStub *stubs.PerformanceMonitorStub @@ -51,6 +52,7 @@ func NewZosAPI(manager substrate.Manager, client zbus.Client, msgBrokerCon strin systemMonitorStub: stubs.NewSystemMonitorStub(client), provisionStub: stubs.NewProvisionStub(client), networkerStub: stubs.NewNetworkerStub(client), + vmStub: stubs.NewVMModuleStub(client), statisticsStub: stubs.NewStatisticsStub(client), storageStub: storageModuleStub, performanceMonitorStub: stubs.NewPerformanceMonitorStub(client),