From 875d69e9a9dc4201251aadb3ff4b81156e6633e7 Mon Sep 17 00:00:00 2001 From: Alessandro Sidero Date: Tue, 30 Dec 2025 17:40:35 +0100 Subject: [PATCH 1/9] Enhance CSV parsing in metrics parser and update histogram/occupancy plots for better precision --- profiling_tools/parse_metrics.py | 219 ++++++++++++++++++++---------- profiling_tools/plot_histogram.gp | 2 +- profiling_tools/plot_occupancy.gp | 6 +- 3 files changed, 154 insertions(+), 73 deletions(-) diff --git a/profiling_tools/parse_metrics.py b/profiling_tools/parse_metrics.py index 8a77ef2..ea56a48 100644 --- a/profiling_tools/parse_metrics.py +++ b/profiling_tools/parse_metrics.py @@ -72,13 +72,41 @@ def get_unit_multiplier(unit_str): return multipliers.get(u, 1.0) def read_csv_with_units(path): - """Read CSV with optional unit row""" + """Read CSV with optional unit row, handling nvprof multi-line format""" rows = [] if not os.path.exists(path): return rows with open(path, 'r', newline='', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() + content = f.read() + + if not content.strip(): + return rows + + # Clean nvprof output: merge lines that are continuations + # nvprof sometimes splits long kernel names across lines + lines = [] + buffer = "" + for line in content.split('\n'): + stripped = line.strip() + # Skip nvprof header lines + if stripped.startswith('==') or not stripped: + continue + + # Count quotes to detect incomplete lines + buffer += line + quote_count = buffer.count('"') + + if quote_count % 2 == 0: + # Complete line + lines.append(buffer.strip()) + buffer = "" + else: + # Incomplete line, continue buffering + buffer += " " + + if buffer.strip(): + lines.append(buffer.strip()) if not lines: return rows @@ -86,15 +114,16 @@ def read_csv_with_units(path): # Find header line header_idx = -1 for i, line in enumerate(lines[:20]): - if '"Name"' in line or 'Name' in line or 'Kernel' in line: + if '"Type"' in line or '"Name"' in line or 'Name' in line or 'Kernel' in line: header_idx = i break if header_idx == -1: return rows - # Parse header - keys = [k.strip().replace('"', '') for k in lines[header_idx].strip().split(',')] + # Parse header - handle quoted CSV properly + header_line = lines[header_idx] + keys = parse_csv_line(header_line) # Check for unit row unit_map = {} @@ -102,9 +131,9 @@ def read_csv_with_units(path): if len(lines) > header_idx + 1: next_line = lines[header_idx + 1] - potential_units = [u.strip().replace('"', '') for u in next_line.strip().split(',')] + potential_units = parse_csv_line(next_line) - # Check if this looks like a unit row + # Check if this looks like a unit row (contains time units or %) if any(u in ['s', 'ms', 'us', 'ns', '%'] for u in potential_units): data_start_idx = header_idx + 2 for i, u in enumerate(potential_units): @@ -112,33 +141,63 @@ def read_csv_with_units(path): unit_map[keys[i]] = get_unit_multiplier(u) # Read data rows - reader = csv.DictReader(lines[data_start_idx:], fieldnames=keys) - for row in reader: - clean_row = {} - for k, v in row.items(): - if not v: - clean_row[k] = v - continue - - # Apply unit conversion - if k in unit_map and unit_map[k] != 1.0: - try: - val_clean = re.sub(r'[^0-9\.]', '', v) - clean_row[k] = float(val_clean) * unit_map[k] - except: - clean_row[k] = v - else: - clean_row[k] = v + for line in lines[data_start_idx:]: + if not line.strip(): + continue + + values = parse_csv_line(line) + row = {} + + for i, val in enumerate(values): + if i < len(keys): + key = keys[i] + # Apply unit conversion + if key in unit_map and unit_map[key] != 1.0: + try: + val_clean = re.sub(r'[^0-9\.\-eE]', '', val) + if val_clean: + row[key] = float(val_clean) * unit_map[key] + else: + row[key] = val + except: + row[key] = val + else: + row[key] = val - rows.append(clean_row) + if row: + rows.append(row) return rows +def parse_csv_line(line): + """Parse a CSV line handling quoted fields""" + fields = [] + current = "" + in_quotes = False + + for char in line: + if char == '"': + in_quotes = not in_quotes + elif char == ',' and not in_quotes: + fields.append(current.strip().replace('"', '')) + current = "" + else: + current += char + + fields.append(current.strip().replace('"', '')) + return fields + def extract_kernel_name(full_name, kernel_filter=None): """Extract kernel name from full mangled name""" if not full_name: return None + # Skip memory operations and API calls + skip_patterns = ['[CUDA memcpy', '[CUDA memset', 'cudaLaunch', 'cudaMalloc', 'cudaFree'] + for skip in skip_patterns: + if skip in full_name: + return None + # If filter provided, check if any filter matches if kernel_filter: for kf in kernel_filter: @@ -146,18 +205,21 @@ def extract_kernel_name(full_name, kernel_filter=None): return kf return None # No match, skip this kernel - # Otherwise, extract base kernel name - # Try to get meaningful name from mangled C++ names - patterns = [ - r'void\s+(\w+)', # void kernel_name<...> - r'(\w+)(?:<|::)', # kernel_name<...> or kernel_name::... - r'^([a-zA-Z_]\w+)', # Simple name at start - ] + # Extract kernel name from mangled C++ name + # Pattern 1: "kernel_name(args)" or "kernel_name