@@ -52,6 +52,11 @@ type NodeServer struct {
5252
5353 // node ID of this node server
5454 nodeID string
55+
56+ // mountedDevices is a cache of currently mounted NVMe devices, used to determine
57+ // when it's safe to disconnect a device.
58+ // it is initialized on startup and updated on each stage/unstage operation.
59+ mountCache nvmeutil.MountCache
5560}
5661
5762// ConnectionInfo holds NVMe-oF connection details.
@@ -102,13 +107,20 @@ func NewNodeServer(
102107 volumeLocks : util .NewIDLocker (),
103108 securityKeys : nil , // Initialize lazily when needed
104109 nodeID : nodeID ,
110+ mountCache : nvmeutil .NewMountCache (),
105111 }
106112
107113 // Load nvme kernel modules
108114 if err := nvmeInitiator .LoadKernelModules (context .Background ()); err != nil {
109115 return nil , fmt .Errorf ("failed to load NVMe kernel modules: %w" , err )
110116 }
111117
118+ // Initialize mounted devices cache on startup to ensure we have an accurate view
119+ if err := ns .initNVMeMountedDevices (context .Background ()); err != nil {
120+ log .WarningLog (context .Background (),
121+ "failed to initialize mounted devices cache on startup: %v (mounted devices cache will be empty)" , err )
122+ }
123+
112124 return ns , nil
113125}
114126
@@ -327,7 +339,15 @@ func (ns *NodeServer) NodeUnstageVolume(
327339
328340 stagingTargetPath := getStagingTargetPath (req )
329341
330- devPath := getDeviceFromStagingPath (ctx , stagingTargetPath )
342+ devPath , exists := ns .mountCache .GetDevice (stagingTargetPath )
343+ if ! exists {
344+ log .WarningLog (ctx , "device not in cache for %s, querying system" , stagingTargetPath )
345+ // Fallback to findmnt if not in cache
346+ devPath , err = nvmeutil .GetDeviceFromMountpoint (ctx , stagingTargetPath )
347+ if err != nil {
348+ log .WarningLog (ctx , "failed to get device from mountpoint for %s: %v" , stagingTargetPath , err )
349+ }
350+ }
331351
332352 isMnt , err := ns .Mounter .IsMountPoint (stagingTargetPath )
333353 if err != nil {
@@ -365,13 +385,13 @@ func (ns *NodeServer) NodeUnstageVolume(
365385 // Non-fatal - a failed disconnect just means the connection lingers until the
366386 // kernel's ctrl_loss_tmo expires or the next reconnect cycle.
367387 if devPath != "" {
368- mountedDevices , err := getNVMeMountedDevices ( ctx )
369- if err != nil {
370- log . WarningLog ( ctx , "failed to get mounted devices: %v (skipping disconnect)" , err )
371- } else {
372- if err := ns .initiator . DisconnectIfLastMount ( ctx , devPath , mountedDevices ); err != nil {
373- log . WarningLog ( ctx , "failed to disconnect controller for device %s: %v" , devPath , err )
374- }
388+ // Remove from cache
389+ ns . mountCache . RemoveByDevice ( devPath )
390+
391+ // Get remaining devices
392+ remainingDevices := ns .mountCache . GetCopyAllDevices ()
393+ if err := ns . initiator . DisconnectIfLastMount ( ctx , devPath , remainingDevices ); err != nil {
394+ log . WarningLog ( ctx , "failed to disconnect controller for device %s: %v" , devPath , err )
375395 }
376396 }
377397
@@ -489,6 +509,16 @@ func (ns *NodeServer) stageTransaction(
489509 }
490510 transaction .isMounted = true
491511
512+ // Resolve real device and update cache
513+ realDevice , err := nvmeutil .GetDeviceFromMountpoint (ctx , stagingTargetPath )
514+ if err != nil {
515+ log .WarningLog (ctx , "failed to resolve device: %v" , err )
516+ // Fallback - try to continue
517+ realDevice = devicePath
518+ }
519+
520+ ns .mountCache .Add (realDevice , stagingTargetPath )
521+
492522 return transaction , nil
493523}
494524
@@ -520,16 +550,17 @@ func (ns *NodeServer) undoStagingTransaction(
520550 // continue on failure to disconnect the image
521551 }
522552 }
523- // disconnect if we connected
553+
554+ // try to remove from cache if we created the staging path - idempotent.
555+ ns .mountCache .RemoveByMountPoint (stagingTargetPath )
556+
557+ // if devicePath is not empty, it means we connected to the subsystem,
558+ // so we should try to disconnect if this was the last mount
524559 if transaction .devicePath != "" {
525- mountedDevices , err := getNVMeMountedDevices (ctx )
526- if err != nil {
527- log .WarningLog (ctx , "failed to get mounted devices during rollback: %v (skipping disconnect)" , err )
528- } else {
529- if err := ns .initiator .DisconnectIfLastMount (ctx , transaction .devicePath , mountedDevices ); err != nil {
530- log .WarningLog (ctx , "failed to disconnect during rollback for device %s: %v" ,
531- transaction .devicePath , err )
532- }
560+ mountedDevices := ns .mountCache .GetCopyAllDevices ()
561+ if err := ns .initiator .DisconnectIfLastMount (ctx , transaction .devicePath , mountedDevices ); err != nil {
562+ log .WarningLog (ctx , "failed to disconnect during rollback for device %s: %v" ,
563+ transaction .devicePath , err )
533564 }
534565 }
535566}
@@ -776,6 +807,21 @@ func (ns *NodeServer) mountVolumeToStagePath(
776807 return err
777808}
778809
810+ // initNVMeMountedDevices initializes the mount cache with currently mounted NVMe devices on startup.
811+ func (ns * NodeServer ) initNVMeMountedDevices (ctx context.Context ) error {
812+ mountedDevices , err := nvmeutil .GetAllNVMeMountedDevices (ctx )
813+ if err != nil {
814+ return err
815+ }
816+
817+ // Add mounted devices to cache
818+ for device , mountPoint := range mountedDevices {
819+ ns .mountCache .Add (device , mountPoint )
820+ }
821+
822+ return nil
823+ }
824+
779825// getStagingTargetPath concats either NodeStageVolumeRequest's or
780826// NodeUnstageVolumeRequest's target path with the volumeID.
781827func getStagingTargetPath (req interface {}) string {
@@ -800,8 +846,3 @@ func getDeviceFromStagingPath(ctx context.Context, stagingTargetPath string) str
800846
801847 return device
802848}
803-
804- // getNVMeMountedDevices returns a map of all currently mounted NVMe devices.
805- func getNVMeMountedDevices (ctx context.Context ) (map [string ]string , error ) {
806- return nvmeutil .GetAllNVMeMountedDevices (ctx )
807- }
0 commit comments