Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions pkg/plugin/nvidia/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,20 @@ func (m *NvidiaDevicePlugin) DevicesNum() int {

// Serve starts the gRPC server of the device plugin.
func (m *NvidiaDevicePlugin) Serve() error {

sock, err := net.Listen("unix", m.socket)
if err != nil {
return err
log.Printf("Listen sock fail and retry for '%s': %s", m.resourceName, err)
err = os.Remove(m.socket)
if err != nil {
log.Printf("Error deleting file: %s, %v\n", m.socket, err)
return err
}
sock, err = net.Listen("unix", m.socket)
if err != nil {
log.Printf("Retry Listen sock fail '%s': %s", m.resourceName, err)
return err
}
}

pluginapi.RegisterDevicePluginServer(m.server, m)
Expand Down Expand Up @@ -343,6 +354,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.Alloc
}

sort.Sort(availablePods)
util.UseClient(m.kubeInteractor.clientset)

Comment on lines +357 to 358
Copy link

Copilot AI Mar 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ensure the relocation of util.UseClient here is intentional and that omitting its call at the original location does not lead to any side effects or missed initialization.

Suggested change
util.UseClient(m.kubeInteractor.clientset)

Copilot uses AI. Check for mistakes.
var candidatePod *v1.Pod
for _, pod := range availablePods {
Expand Down Expand Up @@ -406,7 +418,6 @@ Allocate:
return nil, fmt.Errorf("failed to update pod annotation %v", err)
}

util.UseClient(m.kubeInteractor.clientset)
klog.V(3).Infoln("Releasing lock: nodeName=", m.kubeInteractor.nodeName)
err = util.ReleaseNodeLock(m.kubeInteractor.nodeName, "gpu")
if err != nil {
Expand Down
Loading