Skip to content

Commit c21bbd4

Browse files
committed
Refactor unified memory computation and sanitize process memory usage
1 parent b281c38 commit c21bbd4

File tree

2 files changed

+43
-109
lines changed

2 files changed

+43
-109
lines changed

src/extract_gpuinfo.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ bool gpuinfo_fix_dynamic_info_from_process_info(struct list_head *devices) {
140140
// Update them here since per-process sysfs exposes this information.
141141
bool needGpuEncode = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate);
142142
bool needGpuDecode = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate);
143-
if (needGpuRate || needGpuEncode || needGpuDecode) {
143+
bool needGPUMemory = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory) &&
144+
GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, total_memory);
145+
if (needGpuRate || needGpuEncode || needGpuDecode || needGPUMemory) {
144146
for (unsigned processIdx = 0; processIdx < device->processes_count; ++processIdx) {
145147
struct gpu_process *process_info = &device->processes[processIdx];
146148
if (needGpuRate && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_usage)) {
@@ -164,8 +166,28 @@ bool gpuinfo_fix_dynamic_info_from_process_info(struct list_head *devices) {
164166
SET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate, MYMIN(100, process_info->decode_usage));
165167
}
166168
}
169+
if (needGPUMemory && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_memory_usage)) {
170+
if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory)) {
171+
dynamic_info->used_memory += dynamic_info->used_memory + process_info->gpu_memory_usage;
172+
} else {
173+
SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, process_info->gpu_memory_usage);
174+
}
175+
}
167176
}
168177
}
178+
// Sanitize what we got from processes: we can't have more than the total!
179+
if (needGPUMemory && GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory) &&
180+
GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, total_memory) &&
181+
dynamic_info->used_memory > dynamic_info->total_memory) {
182+
RESET_GPUINFO_DYNAMIC(dynamic_info, used_memory);
183+
}
184+
if (needGPUMemory && !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, free_memory) &&
185+
GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory) &&
186+
GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, total_memory)) {
187+
// We already checked that used_memory <= total_memory so no underflow can happen here
188+
unsigned long long free = dynamic_info->total_memory - dynamic_info->used_memory;
189+
SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, free);
190+
}
169191
if (!GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, gpu_util_rate) && validReportedGpuRate) {
170192
SET_GPUINFO_DYNAMIC(dynamic_info, gpu_util_rate, reportedGpuRate);
171193
} else if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, gpu_util_rate) && validReportedGpuRate) {
@@ -228,14 +250,18 @@ static void gpuinfo_populate_process_info(struct gpu_info *device) {
228250
} else {
229251
cached_pid_info->last_total_consumed_cpu_time = -1;
230252
}
231-
232253
// Process memory usage percent of total device memory
233254
if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, total_memory) &&
234255
GPUINFO_PROCESS_FIELD_VALID(&device->processes[j], gpu_memory_usage)) {
235-
double percentage = fmin(
236-
round(100. * ((double)device->processes[j].gpu_memory_usage / (double)device->dynamic_info.total_memory)),
237-
100.);
238-
SET_GPUINFO_PROCESS(&device->processes[j], gpu_memory_percentage, (unsigned)percentage);
256+
// Sanitize process inputs
257+
if (device->dynamic_info.total_memory < device->processes[j].gpu_memory_usage) {
258+
RESET_GPUINFO_PROCESS(&device->processes[j], gpu_memory_usage);
259+
} else {
260+
double percentage = fmin(
261+
round(100. * ((double)device->processes[j].gpu_memory_usage / (double)device->dynamic_info.total_memory)),
262+
100.);
263+
SET_GPUINFO_PROCESS(&device->processes[j], gpu_memory_percentage, (unsigned)percentage);
264+
}
239265
}
240266
}
241267
}

src/extract_gpuinfo_nvidia.c

Lines changed: 11 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -616,10 +616,10 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) {
616616
last_nvml_return_status = nvmlDeviceGetMemoryInfo_v2(device, &memory_info);
617617
if (last_nvml_return_status == NVML_SUCCESS) {
618618
// Check if this is a unified memory GPU (total == 0 indicates unified memory)
619+
got_meminfo = true;
619620
if (memory_info.total == 0) {
620621
has_unified_memory = true;
621622
} else {
622-
got_meminfo = true;
623623
SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.total);
624624
SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.used);
625625
SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, memory_info.free);
@@ -629,10 +629,11 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) {
629629
// From the NVM: documentation:
630630
// On certain SOC platforms, the integrated GPU (iGPU) does not use a dedicated framebuffer but instead shares
631631
// memory with the system. As a result, NVML_ERROR_NOT_SUPPORTED will be returned in this case.
632+
got_meminfo = true;
632633
has_unified_memory = true;
633634
}
634635
}
635-
if (!got_meminfo && !has_unified_memory && nvmlDeviceGetMemoryInfo) {
636+
if (!got_meminfo && nvmlDeviceGetMemoryInfo) {
636637
nvmlMemory_v1_t memory_info;
637638
last_nvml_return_status = nvmlDeviceGetMemoryInfo(device, &memory_info);
638639
if (last_nvml_return_status == NVML_SUCCESS) {
@@ -653,120 +654,27 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) {
653654
}
654655
}
655656

656-
// Handle unified memory GPUs - query actual GPU allocations and system memory
657+
// Handle unified memory GPUs - query system memory
657658
if (has_unified_memory) {
658-
// Get actual GPU memory usage from running processes
659-
unsigned long long gpu_used_memory = 0;
660-
661-
// Sum up memory used by compute processes
662-
if (nvmlDeviceGetComputeRunningProcesses_v3 || nvmlDeviceGetComputeRunningProcesses_v2 ||
663-
nvmlDeviceGetComputeRunningProcesses_v1) {
664-
unsigned int process_count = 0;
665-
nvmlReturn_t (*getProcesses)(nvmlDevice_t, unsigned int *, void *) = NULL;
666-
size_t process_info_size = 0;
667-
668-
// Choose the latest available version
669-
if (nvmlDeviceGetComputeRunningProcesses_v3) {
670-
getProcesses = nvmlDeviceGetComputeRunningProcesses[3];
671-
process_info_size = sizeof(nvmlProcessInfo_v3_t);
672-
} else if (nvmlDeviceGetComputeRunningProcesses_v2) {
673-
getProcesses = nvmlDeviceGetComputeRunningProcesses[2];
674-
process_info_size = sizeof(nvmlProcessInfo_v2_t);
675-
} else {
676-
getProcesses = nvmlDeviceGetComputeRunningProcesses[1];
677-
process_info_size = sizeof(nvmlProcessInfo_v1_t);
678-
}
679-
680-
// First call to get count
681-
nvmlReturn_t ret = getProcesses(device, &process_count, NULL);
682-
if (ret == NVML_SUCCESS || ret == NVML_ERROR_INSUFFICIENT_SIZE) {
683-
if (process_count > 0) {
684-
void *process_infos = malloc(process_count * process_info_size);
685-
if (process_infos) {
686-
ret = getProcesses(device, &process_count, process_infos);
687-
if (ret == NVML_SUCCESS) {
688-
// Sum up memory from all processes
689-
for (unsigned int i = 0; i < process_count; i++) {
690-
if (nvmlDeviceGetComputeRunningProcesses_v3) {
691-
gpu_used_memory += ((nvmlProcessInfo_v3_t *)process_infos)[i].usedGpuMemory;
692-
} else if (nvmlDeviceGetComputeRunningProcesses_v2) {
693-
gpu_used_memory += ((nvmlProcessInfo_v2_t *)process_infos)[i].usedGpuMemory;
694-
} else {
695-
gpu_used_memory += ((nvmlProcessInfo_v1_t *)process_infos)[i].usedGpuMemory;
696-
}
697-
}
698-
}
699-
free(process_infos);
700-
}
701-
}
702-
}
703-
}
704-
705-
// Also check graphics processes
706-
if (nvmlDeviceGetGraphicsRunningProcesses_v3 || nvmlDeviceGetGraphicsRunningProcesses_v2 ||
707-
nvmlDeviceGetGraphicsRunningProcesses_v1) {
708-
unsigned int process_count = 0;
709-
nvmlReturn_t (*getProcesses)(nvmlDevice_t, unsigned int *, void *) = NULL;
710-
size_t process_info_size = 0;
711-
712-
if (nvmlDeviceGetGraphicsRunningProcesses_v3) {
713-
getProcesses = nvmlDeviceGetGraphicsRunningProcesses[3];
714-
process_info_size = sizeof(nvmlProcessInfo_v3_t);
715-
} else if (nvmlDeviceGetGraphicsRunningProcesses_v2) {
716-
getProcesses = nvmlDeviceGetGraphicsRunningProcesses[2];
717-
process_info_size = sizeof(nvmlProcessInfo_v2_t);
718-
} else {
719-
getProcesses = nvmlDeviceGetGraphicsRunningProcesses[1];
720-
process_info_size = sizeof(nvmlProcessInfo_v1_t);
721-
}
722-
723-
nvmlReturn_t ret = getProcesses(device, &process_count, NULL);
724-
if (ret == NVML_SUCCESS || ret == NVML_ERROR_INSUFFICIENT_SIZE) {
725-
if (process_count > 0) {
726-
void *process_infos = malloc(process_count * process_info_size);
727-
if (process_infos) {
728-
ret = getProcesses(device, &process_count, process_infos);
729-
if (ret == NVML_SUCCESS) {
730-
for (unsigned int i = 0; i < process_count; i++) {
731-
if (nvmlDeviceGetGraphicsRunningProcesses_v3) {
732-
gpu_used_memory += ((nvmlProcessInfo_v3_t *)process_infos)[i].usedGpuMemory;
733-
} else if (nvmlDeviceGetGraphicsRunningProcesses_v2) {
734-
gpu_used_memory += ((nvmlProcessInfo_v2_t *)process_infos)[i].usedGpuMemory;
735-
} else {
736-
gpu_used_memory += ((nvmlProcessInfo_v1_t *)process_infos)[i].usedGpuMemory;
737-
}
738-
}
739-
}
740-
free(process_infos);
741-
}
742-
}
743-
}
744-
}
745-
746659
// Read MemAvailable from /proc/meminfo for available memory
747660
FILE *meminfo = fopen("/proc/meminfo", "r");
748661
if (meminfo) {
749-
unsigned long long available_ram = 0;
662+
unsigned long long total_memory = 0;
750663
char line[256];
751664

752665
while (fgets(line, sizeof(line), meminfo)) {
753-
if (sscanf(line, "MemAvailable: %llu kB", &available_ram) == 1) {
754-
available_ram *= 1024; // Convert KB to bytes
666+
if (sscanf(line, "MemTotal: %llu kB", &total_memory) == 1) {
667+
total_memory *= 1024; // Convert KB to bytes
755668
break;
756669
}
757670
}
758671
fclose(meminfo);
759672

760-
if (available_ram > 0) {
761-
unsigned long long total_memory = gpu_used_memory + available_ram;
762-
673+
// The used memory will be computed from process infos as part of the
674+
// fixup function gpuinfo_fix_dynamic_info_from_process_info from
675+
// extract_gpuinfo.c
676+
if (total_memory > 0)
763677
SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, total_memory);
764-
SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, gpu_used_memory);
765-
SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, available_ram);
766-
if (total_memory > 0) {
767-
SET_GPUINFO_DYNAMIC(dynamic_info, mem_util_rate, gpu_used_memory * 100 / total_memory);
768-
}
769-
}
770678
}
771679
}
772680

0 commit comments

Comments
 (0)