Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pkg/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,10 @@ func (c *Module) startup() error {

// Run creates and starts a container
func (c *Module) Run(ns string, data pkg.Container) (id pkg.ContainerID, err error) {
log.Debug().Str("ns", ns).Msg("starting container")
log.Info().
Str("namespace", ns).
Str("data", fmt.Sprintf("%+v", data)).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data includes envvars, i don't think we should expose that

Msgf("starting container")

// create a new client connected to the default socket path for containerd
client, err := containerd.New(c.containerd)
Expand Down Expand Up @@ -570,7 +573,7 @@ func (c *Module) SignalDelete(ns string, id pkg.ContainerID) error {

// Delete stops and remove a container
func (c *Module) Delete(ns string, id pkg.ContainerID) error {
log.Debug().Str("id", string(id)).Str("ns", ns).Msg("delete container")
log.Info().Str("namespace", ns).Str("container-id", string(id)).Msg("deleting container")

client, err := containerd.New(c.containerd)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/flist/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ func (f *flistModule) cleanUnusedMounts() error {
delete(roTargets, info.Pid)
}
if len(roTargets) == 0 {
log.Debug().Msg("no unused mounts detected")
log.Info().Msg("no unused mounts detected")
}
// cleaning up remaining un-used mounts
for pid, mount := range roTargets {
log.Debug().Int64("source", pid).Msgf("cleaning up mount: %+v", mount)
log.Info().Int64("source", pid).Msgf("cleaning up mount: %+v", mount)
if err := f.system.Unmount(mount.Target, 0); err != nil {
log.Error().Err(err).Str("target", mount.Target).Msg("failed to clean up mount")
continue
Expand Down
3 changes: 3 additions & 0 deletions pkg/network/networker.go
Original file line number Diff line number Diff line change
Expand Up @@ -1068,6 +1068,7 @@ func (n *networker) DeleteNR(wl gridtypes.WorkloadID) error {
return err
}

log.Info().Str("network-id", string(netID)).Msg("deleting network resource")
nr := nr.New(netNR, n.myceliumKeyDir)

if err := nr.Delete(); err != nil {
Expand Down Expand Up @@ -1116,6 +1117,8 @@ func (n *networker) SetPublicConfig(cfg pkg.PublicConfig) error {
return nil
}

log.Info().Str("ipv4", cfg.IPv4.String()).Str("ipv6", cfg.IPv6.String()).Str("gateway", cfg.GW4.String()).Msg("configuring public network")

id := n.identity.NodeID(context.Background())
_, err = public.EnsurePublicSetup(id, environment.MustGet().PubVlan, &cfg)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/network/nr/net_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ func (nr *NetResource) WGName() (string, error) {
// Create setup the basic components of the network resource
// network namespace, bridge, wireguard interface and veth pair
func (nr *NetResource) Create() error {
log.Info().Str("network-id", nr.ID()).Str("subnet", nr.resource.Subnet.String()).Msg("creating network resource")
log.Debug().Str("nr", nr.String()).Msg("create network resource")

if err := nr.ensureNRBridge(); err != nil {
Expand Down Expand Up @@ -604,6 +605,8 @@ func (nr *NetResource) ConfigureWG(privateKey string) error {

// Delete removes all the interfaces and namespaces created by the Create method
func (nr *NetResource) Delete() error {
log.Info().Str("network-id", nr.ID()).Str("subnet", nr.resource.Subnet.String()).Msg("deleting network resource")

netnsName, err := nr.Namespace()
if err != nil {
return err
Expand Down
7 changes: 7 additions & 0 deletions pkg/primitives/zdb/zdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,13 @@ func (p *Manager) createZdbContainerLight(ctx context.Context, device pkg.Device
}

func (p *Manager) zdbRun(ctx context.Context, name string, rootfs string, cmd string, netns string, volumepath string, socketdir string) error {
log.Info().
Str("zdb-name", name).
Str("network-ns", netns).
Str("volume", volumepath).
Str("rootfs", rootfs).
Msg("starting ZDB container")

cont := stubs.NewContainerModuleStub(p.zbus)

// we do data migration here because this is called
Expand Down
7 changes: 7 additions & 0 deletions pkg/provision/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,11 @@ func (e *NativeEngine) Admins() Twins {

// Provision workload
func (e *NativeEngine) Provision(ctx context.Context, deployment gridtypes.Deployment) error {
log.Info().
Uint32("twin", deployment.TwinID).
Uint64("contract", deployment.ContractID).
Msg("scheduling deployment for provision")

if deployment.Version != 0 {
return errors.Wrap(ErrInvalidVersion, "expected version to be 0 on deployment creation")
}
Expand Down Expand Up @@ -987,6 +992,8 @@ func (e *NativeEngine) updateDeployment(ctx context.Context, ops []gridtypes.Upg

// DecommissionCached implements the zbus interface
func (e *NativeEngine) DecommissionCached(id string, reason string) error {
log.Info().Str("workload-id", id).Str("reason", reason).Msg("decommissioning cached workload")

globalID := gridtypes.WorkloadID(id)
twin, dlID, name, err := globalID.Parts()
if err != nil {
Expand Down
11 changes: 11 additions & 0 deletions pkg/provision/provisiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"

"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/threefoldtech/zosbase/pkg/gridtypes"
)

Expand Down Expand Up @@ -120,6 +121,8 @@ func (p *mapProvisioner) Initialize(ctx context.Context) error {

// Provision implements provision.Provisioner
func (p *mapProvisioner) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (result gridtypes.Result, err error) {
log.Info().Str("workload-id", string(wl.ID)).Str("workload-type", string(wl.Type)).Msg("provisioning workload")

manager, ok := p.managers[wl.Type]
if !ok {
return result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID)
Expand All @@ -135,6 +138,8 @@ func (p *mapProvisioner) Provision(ctx context.Context, wl *gridtypes.WorkloadWi

// Decommission implementation for provision.Provisioner
func (p *mapProvisioner) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error {
log.Info().Str("workload-id", string(wl.ID)).Str("workload-type", string(wl.Type)).Msg("deprovisioning workload")

manager, ok := p.managers[wl.Type]
if !ok {
return fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID)
Expand All @@ -145,6 +150,8 @@ func (p *mapProvisioner) Deprovision(ctx context.Context, wl *gridtypes.Workload

// Pause a workload
func (p *mapProvisioner) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) {
log.Info().Str("workload-id", string(wl.ID)).Str("workload-type", string(wl.Type)).Msg("pausing workload")

if wl.Result.State != gridtypes.StateOk {
return wl.Result, fmt.Errorf("can only pause workloads in ok state")
}
Expand Down Expand Up @@ -172,6 +179,8 @@ func (p *mapProvisioner) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID

// Resume a workload
func (p *mapProvisioner) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) {
log.Info().Str("workload-id", string(wl.ID)).Str("workload-type", string(wl.Type)).Msg("resuming workload")

if wl.Result.State != gridtypes.StatePaused {
return wl.Result, fmt.Errorf("can only resume workloads in paused state")
}
Expand All @@ -198,6 +207,8 @@ func (p *mapProvisioner) Resume(ctx context.Context, wl *gridtypes.WorkloadWithI

// Provision implements provision.Provisioner
func (p *mapProvisioner) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (result gridtypes.Result, err error) {
log.Info().Str("workload-id", string(wl.ID)).Str("workload-type", string(wl.Type)).Msg("updating workload")

manager, ok := p.managers[wl.Type]
if !ok {
return result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID)
Expand Down
4 changes: 3 additions & 1 deletion pkg/storage_light/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func (s *Module) poolType(pool filesystem.Pool, vm bool) (zos.DeviceType, error)
}

func (s *Module) mountPool(device filesystem.DeviceInfo, vm bool) {
log.Debug().Any("device", device).Msg("mounting device")
log.Info().Str("device", device.Path).Str("label", device.Label).Msg("mounting storage device")

if device.IsPXEPartition() {
log.Debug().Str("device", device.Path).Msg("skip device has 'ZOSPXE' label")
Expand Down Expand Up @@ -231,9 +231,11 @@ func (s *Module) mountPool(device filesystem.DeviceInfo, vm bool) {
case zos.SSDDevice:
s.totalSSD += usage.Size
s.ssds = append(s.ssds, pool)
log.Info().Str("device", device.Path).Str("type", "SSD").Str("pool", pool.Name()).Uint64("size", usage.Size).Msg("mounted storage pool")
case zos.HDDDevice:
s.totalHDD += usage.Size
s.hdds = append(s.hdds, pool)
log.Info().Str("device", device.Path).Str("type", "HDD").Str("pool", pool.Name()).Uint64("size", usage.Size).Msg("mounted storage pool")
default:
log.Error().Str("type", string(typ)).Str("device", device.Path).Msg("unknown device type")
}
Expand Down
50 changes: 47 additions & 3 deletions pkg/vm/ch.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (m *Machine) startCloudConsole(ctx context.Context, namespace string, netwo
logs,
}

log.Debug().Msgf("running cloud-console : %+v", args)
log.Info().Str("vm-id", m.ID).Str("namespace", namespace).Str("console-ip", networkAddr.IP.String()).Msgf("starting cloud-console on port %d", port)

cmd := exec.CommandContext(ctx, "busybox", args...)
if err := cmd.Start(); err != nil {
Expand Down Expand Up @@ -100,6 +100,7 @@ func (m *Machine) startCloudConsoleLight(ctx context.Context, namespace string,
fmt.Sprint(port),
logs,
}
log.Info().Str("vm-id", m.ID).Str("namespace", namespace).Str("mycelium-ip", mycIp).Msgf("starting cloud-console on port %d", port)
log.Debug().Msgf("running cloud-console : %+v", args)

cmd := exec.CommandContext(ctx, "busybox", args...)
Expand Down Expand Up @@ -228,7 +229,8 @@ func (m *Machine) Run(ctx context.Context, socket, logs string) (pkg.MachineInfo
// this.
fullArgs = append(fullArgs, "setsid", chBin)
fullArgs = append(fullArgs, argsList...)
log.Debug().Msgf("ch: %+v", fullArgs)
log.Info().Str("vm-id", m.ID).Uint8("cpu", uint8(m.Config.CPU)).Str("memory", m.Config.Mem.String()).Msg("starting cloud-hypervisor VM")
log.Info().Msgf("ch: %+v", fullArgs)

cmd := exec.CommandContext(ctx, "busybox", fullArgs...)
cmd.Stdout = logFd
Expand Down Expand Up @@ -280,6 +282,48 @@ func (m *Machine) Run(ctx context.Context, socket, logs string) (pkg.MachineInfo
if err != nil {
return pkg.MachineInfo{}, errors.Wrapf(err, "failed to Inspect vm with id: '%s'", m.ID)
}

// Log VM network interfaces with full address information
logEvent := log.Info().Str("vm-id", m.ID)
for idx, ifc := range m.Interfaces {
logEvent = logEvent.
Str(fmt.Sprintf("interface-%d-id", idx), ifc.ID).
Str(fmt.Sprintf("interface-%d-tap", idx), ifc.Tap).
Str(fmt.Sprintf("interface-%d-mac", idx), ifc.Mac)

// Log IP addresses from NetworkInfo if available
if m.NetworkInfo != nil && idx < len(m.NetworkInfo.Ifaces) {
vmIface := m.NetworkInfo.Ifaces[idx]

// Log all IP addresses
for ipIdx, ip := range vmIface.IPs {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-ip-%d", idx, ipIdx), ip.String())
}

// Log gateways
if vmIface.IP4DefaultGateway != nil {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-ipv4-gateway", idx), vmIface.IP4DefaultGateway.String())
}
if vmIface.IP6DefaultGateway != nil {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-ipv6-gateway", idx), vmIface.IP6DefaultGateway.String())
}

// Log public IP flags
if vmIface.PublicIPv4 {
logEvent = logEvent.Bool(fmt.Sprintf("interface-%d-public-ipv4", idx), true)
}
if vmIface.PublicIPv6 {
logEvent = logEvent.Bool(fmt.Sprintf("interface-%d-public-ipv6", idx), true)
}

// Log network ID if present
if vmIface.NetID != "" {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-network-id", idx), string(vmIface.NetID))
}
}
}
logEvent.Msg("VM started with network interfaces and addresses")

consoleURL := ""
for _, ifc := range m.Interfaces {
if ifc.Console != nil {
Expand Down Expand Up @@ -323,7 +367,7 @@ func (m *Machine) waitAndAdjOom(ctx context.Context, name string, socket string)
ctx,
),
func(err error, d time.Duration) {
log.Debug().Err(err).Str("id", name).Msg("vm is not up yet")
log.Info().Err(err).Str("id", name).Msg("vm is not up yet")
}); err != nil {

return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/vm/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/threefoldtech/zosbase/pkg"
"github.com/vishvananda/netlink"
)

Expand Down Expand Up @@ -138,6 +139,8 @@ type Machine struct {
// NoKeepAlive is not used by firecracker, but instead a marker
// for the vm mananger to not restart the machine when it stops
NoKeepAlive bool `json:"no-keep-alive"`
// NetworkInfo holds the full network configuration with IPs (not serialized to config file)
NetworkInfo *pkg.VMNetworkInfo `json:"-"`
}

// Save saves a machine into a file
Expand Down
80 changes: 77 additions & 3 deletions pkg/vm/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ func (m *Module) Run(vm pkg.VM) (pkg.MachineInfo, error) {
Disks: disks,
Devices: vm.Devices,
NoKeepAlive: vm.NoKeepAlive,
NetworkInfo: &vm.Network,
}

log.Debug().Str("name", vm.Name).Msg("saving machine")
Expand All @@ -569,6 +570,78 @@ func (m *Module) Run(vm pkg.VM) (pkg.MachineInfo, error) {
m.failures.Set(vm.Name, permanent, cache.NoExpiration)
}

// Log comprehensive VM configuration before starting
logEvent := log.Info().
Str("vm-id", vm.Name).
Str("hostname", vm.Hostname).
Uint8("cpu", uint8(vm.CPU)).
Uint64("memory-bytes", uint64(vm.Memory))

// Log network interfaces with IPs and MACs
for idx, nic := range nics {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can implement a serializer for the interface, to construct a consistent printable string, instead of duplicating in ch.go

logEvent = logEvent.
Str(fmt.Sprintf("interface-%d-tap", idx), nic.Tap).
Str(fmt.Sprintf("interface-%d-mac", idx), nic.Mac)

// Log console info if available
if nic.Console != nil {
logEvent = logEvent.
Str(fmt.Sprintf("interface-%d-console-ip", idx), nic.Console.ListenAddress.IP.String()).
Str(fmt.Sprintf("interface-%d-vm-ip", idx), nic.Console.VmAddress.IP.String()).
Str(fmt.Sprintf("interface-%d-namespace", idx), nic.Console.Namespace)
}

// Find corresponding interface config to get IPs
if idx < len(vm.Network.Ifaces) {
ifcfg := vm.Network.Ifaces[idx]
if len(ifcfg.IPs) > 0 {
ips := make([]string, len(ifcfg.IPs))
for i, ip := range ifcfg.IPs {
ips[i] = ip.String()
}
logEvent = logEvent.Strs(fmt.Sprintf("interface-%d-ips", idx), ips)
}
// Log gateways if present
if ifcfg.IP4DefaultGateway != nil {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-gw4", idx), ifcfg.IP4DefaultGateway.String())
}
if ifcfg.IP6DefaultGateway != nil {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-gw6", idx), ifcfg.IP6DefaultGateway.String())
}
// Log if this is a public interface
if ifcfg.PublicIPv4 {
logEvent = logEvent.Bool(fmt.Sprintf("interface-%d-public-ipv4", idx), true)
}
if ifcfg.PublicIPv6 {
logEvent = logEvent.Bool(fmt.Sprintf("interface-%d-public-ipv6", idx), true)
}
// Log network ID for private networks
if ifcfg.NetID != "" {
logEvent = logEvent.Str(fmt.Sprintf("interface-%d-network-id", idx), string(ifcfg.NetID))
}
}
}

// Log mounts
if len(cfg.Mounts) > 0 {
mountPaths := make([]string, len(cfg.Mounts))
for i, mnt := range cfg.Mounts {
mountPaths[i] = fmt.Sprintf("%s->%s", mnt.Source, mnt.Target)
}
logEvent = logEvent.Strs("mounts", mountPaths)
}

// Log disks
if len(disks) > 0 {
diskPaths := make([]string, len(disks))
for i, disk := range disks {
diskPaths[i] = disk.Path
}
logEvent = logEvent.Strs("disks", diskPaths)
}

logEvent.Msg("starting VM with full configuration")

machineInfo, err := machine.Run(ctx, m.socketPath(vm.Name), m.logsPath(vm.Name))
if err != nil {
return pkg.MachineInfo{}, m.withLogs(m.logsPath(vm.Name), err)
Expand Down Expand Up @@ -615,6 +688,7 @@ func (m *Module) removeConfig(name string) {

// Delete deletes a machine by name (id)
func (m *Module) Delete(name string) error {
log.Info().Str("vm-id", name).Msg("deleting VM")
defer m.failures.Delete(name)

// before we do anything we set failures to permanent to prevent monitoring from trying
Expand Down Expand Up @@ -649,7 +723,7 @@ func (m *Module) Delete(name string) error {
killAfter = 10 * time.Second
)

log.Debug().Str("name", name).Msg("shutting vm down [client]")
log.Info().Str("name", name).Msg("shutting vm down [client]")
if err := client.Shutdown(ctx); err != nil {
log.Error().Err(err).Str("name", name).Msg("failed to shutdown machine")
}
Expand All @@ -659,13 +733,13 @@ func (m *Module) Delete(name string) error {
return nil
}

log.Debug().Str("name", name).Msg("shutting vm down [sigterm]")
log.Info().Str("name", name).Msg("shutting vm down [sigterm]")
if time.Since(now) > termAfter {
_ = syscall.Kill(ps.Pid, syscall.SIGTERM)
}

if time.Since(now) > killAfter {
log.Debug().Str("name", name).Msg("shutting vm down [sigkill]")
log.Info().Str("name", name).Msg("shutting vm down [sigkill]")
_ = syscall.Kill(ps.Pid, syscall.SIGKILL)
break
}
Expand Down
Loading
Loading