Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)

replace github.com/NVIDIA/go-nvml => ../go-nvml
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ cyphar.com/go-pathrs v0.2.1 h1:9nx1vOgwVvX1mNBWDu93+vaceedpbsDqo+XuBGL40b8=
cyphar.com/go-pathrs v0.2.1/go.mod h1:y8f1EMG7r+hCuFf/rXsKqMJrJAUoADZGNh5/vZPKcGc=
github.com/NVIDIA/go-nvlib v0.9.1-0.20251202135446-d0f42ba016dd h1:MC1w/VYuo9Zt0se4SSx9BVid4a46ai+voN3knRvVWjE=
github.com/NVIDIA/go-nvlib v0.9.1-0.20251202135446-d0f42ba016dd/go.mod h1:7mzx9FSdO9fXWP9NKuZmWkCwhkEcSWQFe2tmFwtLb9c=
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
Expand Down
11 changes: 11 additions & 0 deletions internal/platform-support/tegra/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type matcherAsFilter struct {
}

type filterByMountSpecType map[csv.MountSpecType]filter
type filterByMountSpecPathsByTyper struct {
MountSpecPathsByTyper
}

type pathPatterns []string
type pathPattern string
Expand Down Expand Up @@ -125,6 +128,14 @@ func (p filterByMountSpecType) Apply(input MountSpecPathsByTyper) MountSpecPaths
return ms
}

func (p filterByMountSpecPathsByTyper) Apply(input MountSpecPathsByTyper) MountSpecPathsByTyper {
f := make(filterByMountSpecType)
for t, p := range p.MountSpecPathsByType() {
f[t] = &matcherAsFilter{pathPatterns(p)}
}
return f.Apply(input)
}

// apply uses a matcher to filter an input string.
// Each element in the input that matches is skipped and the remaining elements
// are returned.
Expand Down
4 changes: 4 additions & 0 deletions internal/platform-support/tegra/mount_specs.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ func WithoutDeviceNodes() Transformer {
}
}

func Without(m MountSpecPathsByTyper) Transformer {
return filterByMountSpecPathsByTyper{m}
}

// WithoutRegularDeviceNodes creates a transfomer which removes
// regular `/dev/nvidia[0-9]+` device nodes from the source.
func WithoutRegularDeviceNodes() Transformer {
Expand Down
154 changes: 114 additions & 40 deletions pkg/nvcdi/lib-csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,7 @@ var _ deviceSpecGeneratorFactory = (*csvlib)(nil)
// If NVML is not available or the disable-multiple-csv-devices feature flag is
// enabled, a single device is assumed.
func (l *csvlib) DeviceSpecGenerators(ids ...string) (DeviceSpecGenerator, error) {
if l.featureFlags[FeatureDisableMultipleCSVDevices] {
return l.purecsvDeviceSpecGenerators(ids...)
}
hasNVML, _ := l.infolib.HasNvml()
if !hasNVML {
if l.usePureCSVDeviceSpecGenerator() {
return l.purecsvDeviceSpecGenerators(ids...)
}
mixed, err := l.mixedDeviceSpecGenerators(ids...)
Expand All @@ -61,6 +57,29 @@ func (l *csvlib) DeviceSpecGenerators(ids ...string) (DeviceSpecGenerator, error
return mixed, nil
}

func (l *csvlib) usePureCSVDeviceSpecGenerator() bool {
if l.featureFlags[FeatureDisableMultipleCSVDevices] {
return true
}
hasNVML, _ := l.infolib.HasNvml()
if !hasNVML {
return true
}
asNvmlLib := (*nvmllib)(l)
err := asNvmlLib.init()
if err != nil {
return true
}
defer asNvmlLib.tryShutdown()

numDevices, ret := l.nvmllib.DeviceGetCount()
if ret != nvml.SUCCESS {
return true
}

return numDevices <= 1
}

func (l *csvlib) purecsvDeviceSpecGenerators(ids ...string) (DeviceSpecGenerator, error) {
for _, id := range ids {
switch id {
Expand Down Expand Up @@ -88,6 +107,7 @@ type csvDeviceGenerator struct {
*csvlib
index int
uuid string
mode string
}

func (l *csvDeviceGenerator) GetUUID() (string, error) {
Expand Down Expand Up @@ -131,30 +151,63 @@ func (l *csvDeviceGenerator) GetDeviceSpecs() ([]specs.Device, error) {
// - The device node (i.e. /dev/nvidia{{ .index }}) associated with this
// particular device is added to the set of device nodes to be discovered.
func (l *csvDeviceGenerator) deviceNodeDiscoverer() (discover.Discover, error) {
mountSpecs := tegra.Transform(
tegra.Transform(
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
// We remove non-device nodes.
tegra.OnlyDeviceNodes(),
),
// We remove the regular (nvidia[0-9]+) device nodes.
tegra.WithoutRegularDeviceNodes(),
)
return tegra.New(
tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot),
tegra.WithDevRoot(l.devRoot),
tegra.WithHookCreator(l.hookCreator),
tegra.WithLdconfigPath(l.ldconfigPath),
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),
tegra.WithMountSpecs(
mountSpecs,
// We add the specific device node for this device.
tegra.DeviceNodes(fmt.Sprintf("/dev/nvidia%d", l.index)),
),
tegra.WithMountSpecs(l.deviceNodeMountSpecs()),
)
}

func (l *csvDeviceGenerator) deviceNodeMountSpecs() tegra.MountSpecPathsByTyper {
mountSpecs := tegra.Transform(
tegra.MountSpecsFromCSVFiles(l.logger, l.csvFiles...),
// We remove non-device nodes.
tegra.OnlyDeviceNodes(),
)
switch l.mode {
// For a dGPU we remove all regular device nodes from the list of device
// nodes that we detect and only look for the node associated with the
// index.
case "dgpu":
return tegra.Transform(
tegra.Transform(
mountSpecs,
// We remove the regular (nvidia[0-9]+) device nodes.
// The device nodes for the GPU are discovered for the full GPU.
tegra.WithoutRegularDeviceNodes(),
),
// We also ignore control device nodes since these are included in
// the full GPU spec generator.
tegra.Without(
tegra.DeviceNodes(
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
"/dev/nvidia-uvm",
"/dev/nvidiactl",
),
),
)
case "igpu":
return tegra.Merge(
tegra.Transform(
mountSpecs,
// We remove the /dev/nvidia1 device node.
// TODO: This assumes that the dGPU has the index 1 and remove
// it from the set of device nodes.
tegra.Without(tegra.DeviceNodes("/dev/nvidia1")),
),
// We add the display device from the iGPU.
tegra.DeviceNodes("/dev/nvidia2"),
)
default:
return mountSpecs
}
}

// GetCommonEdits generates a CDI specification that can be used for ANY devices
// These explicitly do not include any device nodes.
func (l *csvlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
Expand Down Expand Up @@ -246,35 +299,56 @@ func (l *mixedcsvlib) csvDeviceSpecGenerator(index int, uuid string, device nvml
return nil, fmt.Errorf("is-integrated check failed for device (index=%v,uuid=%v)", index, uuid)
}

if isIntegrated {
return l.iGPUDeviceSpecGenerator(index, uuid)
}

return l.dGPUDeviceSpecGenerator(index, uuid, device)
}

func (l *mixedcsvlib) dGPUDeviceSpecGenerator(index int, uuid string, device nvml.Device) (DeviceSpecGenerator, error) {
if index != 1 {
return nil, fmt.Errorf("unexpected device index for dGPU: %d", index)
}
g := &csvDeviceGenerator{
csvlib: (*csvlib)(l),
index: index,
uuid: uuid,
mode: "dgpu",
}

if !isIntegrated {
csvDeviceNodeDiscoverer, err := g.deviceNodeDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for devices nodes: %w", err)
}

// If this is not an integrated GPU, we also create a spec generator for
// the full GPU.
dgpu := (*nvmllib)(l).withInit(&fullGPUDeviceSpecGenerator{
nvmllib: (*nvmllib)(l),
uuid: uuid,
index: index,
// For the CSV case, we include the control device nodes at a
// device level.
additionalDiscoverers: []discover.Discover{
(*nvmllib)(l).controlDeviceNodeDiscoverer(),
csvDeviceNodeDiscoverer,
},
featureFlags: l.featureFlags,
})
return dgpu, nil
csvDeviceNodeDiscoverer, err := g.deviceNodeDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for devices nodes: %w", err)
}

// If this is not an integrated GPU, we also create a spec generator for
// the full GPU.
dgpu := (*nvmllib)(l).withInit(&fullGPUDeviceSpecGenerator{
nvmllib: (*nvmllib)(l),
uuid: uuid,
index: index,
// For the CSV case, we include the control device nodes at a
// device level.
additionalDiscoverers: []discover.Discover{
(*nvmllib)(l).controlDeviceNodeDiscoverer(),
csvDeviceNodeDiscoverer,
},
featureFlags: l.featureFlags,
})
return dgpu, nil
}

func (l *mixedcsvlib) iGPUDeviceSpecGenerator(index int, uuid string) (DeviceSpecGenerator, error) {
if index != 0 {
return nil, fmt.Errorf("unexpected device index for iGPU: %d", index)
}
g := &csvDeviceGenerator{
csvlib: (*csvlib)(l),
index: index,
uuid: uuid,
mode: "igpu",
}
return g, nil
}

Expand Down
Loading
Loading