Support for custom structs #1513
Replies: 1 comment 1 reply
-
|
@desplenterkarel thanks for asking this question. As of now, we don't have an elegant way of passing struct-like Python objects through to kernel launches, but you can pass Here's an AI-generated example of using NumPy structured arrays with import numpy as np
from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch
from cuda.bindings import driver
# CUDA kernel operating on array of structs
# NOTE: Field order and sizes must match the NumPy structured dtype exactly
code = """
struct Record {
int id; // 4 bytes (int32)
long long count; // 8 bytes (int64)
float value; // 4 bytes (float32)
double weight; // 8 bytes (float64)
};
extern "C"
__global__ void process_records(Record* records, int n, double scale) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < n) {
Record* r = &records[tid];
r->count += r->id;
r->value *= (float)scale;
r->weight *= scale;
}
}
"""
def main():
# Initialize device
dev = Device()
dev.set_current()
stream = dev.create_stream()
# Define structured dtype matching the CUDA struct layout
record_dtype = np.dtype([
('id', np.int32), # 4 bytes
('count', np.int64), # 8 bytes (4 bytes padding before)
('value', np.float32), # 4 bytes
('weight', np.float64), # 8 bytes (4 bytes padding before)
], align=True) # align=True ensures C-compatible struct alignment
print(f"Structured dtype: {record_dtype}")
print(f"Total size per record: {record_dtype.itemsize} bytes")
# Create and initialize structured array on host (NumPy)
n_records = 1000
rng = np.random.default_rng(42)
records_host = np.zeros(n_records, dtype=record_dtype)
records_host['id'] = np.arange(n_records, dtype=np.int32)
records_host['count'] = np.zeros(n_records, dtype=np.int64)
records_host['value'] = rng.uniform(0, 100, n_records).astype(np.float32)
records_host['weight'] = rng.uniform(0, 1, n_records).astype(np.float64)
# Store original values for verification
original_count = records_host['count'].copy()
original_value = records_host['value'].copy()
original_weight = records_host['weight'].copy()
# Allocate device memory and copy data
array_size = records_host.nbytes
records_buf = dev.allocate(array_size, stream=stream)
driver.cuMemcpyHtoD(records_buf.handle, records_host, array_size)
# Compile kernel
prog = Program(code, code_type="c++", options=ProgramOptions(arch=f"sm_{dev.arch}"))
mod = prog.compile("cubin")
kernel = mod.get_kernel("process_records")
# Launch kernel - pass Buffer, its pointer is extracted automatically
scale = np.float64(2.5)
config = LaunchConfig(grid=(n_records + 255) // 256, block=256)
launch(stream, config, kernel, records_buf, np.int32(n_records), scale)
stream.sync()
# Copy back to host for verification
records_result = np.zeros_like(records_host)
driver.cuMemcpyDtoH(records_result, records_buf.handle, array_size)
# Verify results
expected_count = original_count + records_host['id']
expected_value = original_value * scale
expected_weight = original_weight * scale
assert np.allclose(records_result['count'], expected_count), "count mismatch"
assert np.allclose(records_result['value'], expected_value), "value mismatch"
assert np.allclose(records_result['weight'], expected_weight), "weight mismatch"
# Print sample results
print(f"\nProcessed {n_records} records with scale={scale}")
print(f"\nSample record [0]:")
print(f" id={records_result['id'][0]}, count={records_result['count'][0]}, "
f"value={records_result['value'][0]:.2f}, weight={records_result['weight'][0]:.4f}")
records_buf.close(stream)
stream.close()
print("\nSuccess!")
if __name__ == "__main__":
main()In general, you can pass Does this help for your use case? |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
I see that Warp provides built-in structs like quaternion, and also supports defining custom structs (see: warp#structs).
My question is: are there plans to support user-defined custom structs, especially for passing them from Python into kernels?
Right now, my assumption is that this would have to work similarly to how cupy handles custom user types cupy#custom-user-types.
For example, if I want something like a float3* float3_array, I could do:
However, this approach effectively limits me to “structs” where all fields have the same type and layout (e.g., only floats).
It doesn’t seem to support heterogeneous structs (e.g., mixing floats, ints, etc.).
Am I missing an existing mechanism for defining and passing custom structured data, or is this currently not supported / planned?
Any insight would be appreciated. Thanks!
Beta Was this translation helpful? Give feedback.
All reactions