|
| 1 | +//===- c_api.h - SparseFlow Stable C API --------------------------------===// |
| 2 | +// |
| 3 | +// Stable ABI v1 - Binary compatible across minor versions |
| 4 | +// Breaking changes only on major version bump |
| 5 | +// |
| 6 | +//===----------------------------------------------------------------------===// |
| 7 | + |
| 8 | +#ifndef SPARSEFLOW_C_API_H |
| 9 | +#define SPARSEFLOW_C_API_H |
| 10 | + |
| 11 | +#include <stddef.h> |
| 12 | +#include <stdint.h> |
| 13 | + |
| 14 | +#ifdef __cplusplus |
| 15 | +extern "C" { |
| 16 | +#endif |
| 17 | + |
| 18 | +// ABI version - increment on breaking changes |
| 19 | +#define SPARSEFLOW_ABI_VERSION_MAJOR 1 |
| 20 | +#define SPARSEFLOW_ABI_VERSION_MINOR 0 |
| 21 | +#define SPARSEFLOW_ABI_VERSION_PATCH 0 |
| 22 | + |
| 23 | +// Export macro |
| 24 | +#if defined(_WIN32) |
| 25 | + #ifdef SPARSEFLOW_BUILD_SHARED |
| 26 | + #define SPARSEFLOW_API __declspec(dllexport) |
| 27 | + #else |
| 28 | + #define SPARSEFLOW_API __declspec(dllimport) |
| 29 | + #endif |
| 30 | +#else |
| 31 | + #define SPARSEFLOW_API __attribute__((visibility("default"))) |
| 32 | +#endif |
| 33 | + |
| 34 | +//===----------------------------------------------------------------------===// |
| 35 | +// Opaque handles (hide implementation details) |
| 36 | +//===----------------------------------------------------------------------===// |
| 37 | + |
| 38 | +typedef struct SparseFlowContext_* SparseFlowContext; |
| 39 | +typedef struct SparseFlowKernel_* SparseFlowKernel; |
| 40 | +typedef struct SparseFlowTensor_* SparseFlowTensor; |
| 41 | + |
| 42 | +//===----------------------------------------------------------------------===// |
| 43 | +// Error handling |
| 44 | +//===----------------------------------------------------------------------===// |
| 45 | + |
| 46 | +typedef enum { |
| 47 | + SPARSEFLOW_SUCCESS = 0, |
| 48 | + SPARSEFLOW_ERROR_INVALID_ARGUMENT = 1, |
| 49 | + SPARSEFLOW_ERROR_OUT_OF_MEMORY = 2, |
| 50 | + SPARSEFLOW_ERROR_CUDA_ERROR = 3, |
| 51 | + SPARSEFLOW_ERROR_NOT_SUPPORTED = 4, |
| 52 | + SPARSEFLOW_ERROR_INVALID_HANDLE = 5, |
| 53 | + SPARSEFLOW_ERROR_COMPILATION_FAILED = 6, |
| 54 | + SPARSEFLOW_ERROR_ABI_MISMATCH = 7, |
| 55 | +} SparseFlowStatus; |
| 56 | + |
| 57 | +// Get error message for status code |
| 58 | +SPARSEFLOW_API const char* sparseflow_get_error_string(SparseFlowStatus status); |
| 59 | + |
| 60 | +// Get last error message (thread-local) |
| 61 | +SPARSEFLOW_API const char* sparseflow_get_last_error(); |
| 62 | + |
| 63 | +//===----------------------------------------------------------------------===// |
| 64 | +// Version info |
| 65 | +//===----------------------------------------------------------------------===// |
| 66 | + |
| 67 | +typedef struct { |
| 68 | + int major; |
| 69 | + int minor; |
| 70 | + int patch; |
| 71 | +} SparseFlowVersion; |
| 72 | + |
| 73 | +SPARSEFLOW_API SparseFlowVersion sparseflow_get_version(); |
| 74 | + |
| 75 | +// Check ABI compatibility |
| 76 | +SPARSEFLOW_API int sparseflow_is_abi_compatible(int major, int minor); |
| 77 | + |
| 78 | +//===----------------------------------------------------------------------===// |
| 79 | +// Context management |
| 80 | +//===----------------------------------------------------------------------===// |
| 81 | + |
| 82 | +// Create context for specific GPU |
| 83 | +SPARSEFLOW_API SparseFlowStatus sparseflow_create_context( |
| 84 | + SparseFlowContext* ctx, |
| 85 | + int device_id |
| 86 | +); |
| 87 | + |
| 88 | +// Destroy context |
| 89 | +SPARSEFLOW_API SparseFlowStatus sparseflow_destroy_context( |
| 90 | + SparseFlowContext ctx |
| 91 | +); |
| 92 | + |
| 93 | +// Get GPU info from context |
| 94 | +SPARSEFLOW_API SparseFlowStatus sparseflow_get_device_info( |
| 95 | + SparseFlowContext ctx, |
| 96 | + int* compute_capability, |
| 97 | + int* sm_count, |
| 98 | + size_t* total_memory |
| 99 | +); |
| 100 | + |
| 101 | +//===----------------------------------------------------------------------===// |
| 102 | +// Epilogue configuration |
| 103 | +//===----------------------------------------------------------------------===// |
| 104 | + |
| 105 | +typedef enum { |
| 106 | + SPARSEFLOW_EPILOGUE_NONE = 0, |
| 107 | + SPARSEFLOW_EPILOGUE_RELU = 1, |
| 108 | + SPARSEFLOW_EPILOGUE_SILU = 2, |
| 109 | + SPARSEFLOW_EPILOGUE_GELU = 3, |
| 110 | + SPARSEFLOW_EPILOGUE_BIAS = 4, |
| 111 | + SPARSEFLOW_EPILOGUE_BIAS_RELU = 5, |
| 112 | + SPARSEFLOW_EPILOGUE_BIAS_SILU = 6, |
| 113 | +} SparseFlowEpilogue; |
| 114 | + |
| 115 | +typedef struct { |
| 116 | + SparseFlowEpilogue kind; |
| 117 | + const void* params; // Optional parameters (e.g., bias pointer) |
| 118 | + size_t params_size; // Size of params in bytes |
| 119 | +} SparseFlowEpilogueConfig; |
| 120 | + |
| 121 | +//===----------------------------------------------------------------------===// |
| 122 | +// Kernel compilation |
| 123 | +//===----------------------------------------------------------------------===// |
| 124 | + |
| 125 | +typedef struct { |
| 126 | + int tile_m; // Tile size M (0 = auto-select) |
| 127 | + int tile_n; // Tile size N (0 = auto-select) |
| 128 | + int tile_k; // Tile size K (0 = auto-select) |
| 129 | + SparseFlowEpilogueConfig epilogue; |
| 130 | +} SparseFlowKernelConfig; |
| 131 | + |
| 132 | +// Compile kernel with given configuration |
| 133 | +SPARSEFLOW_API SparseFlowStatus sparseflow_compile_kernel( |
| 134 | + SparseFlowContext ctx, |
| 135 | + SparseFlowKernel* kernel, |
| 136 | + const SparseFlowKernelConfig* config |
| 137 | +); |
| 138 | + |
| 139 | +// Destroy kernel |
| 140 | +SPARSEFLOW_API SparseFlowStatus sparseflow_destroy_kernel( |
| 141 | + SparseFlowKernel kernel |
| 142 | +); |
| 143 | + |
| 144 | +//===----------------------------------------------------------------------===// |
| 145 | +// Tensor operations |
| 146 | +//===----------------------------------------------------------------------===// |
| 147 | + |
| 148 | +typedef enum { |
| 149 | + SPARSEFLOW_DTYPE_FP16 = 0, |
| 150 | + SPARSEFLOW_DTYPE_FP32 = 1, |
| 151 | + SPARSEFLOW_DTYPE_INT8 = 2, |
| 152 | +} SparseFlowDataType; |
| 153 | + |
| 154 | +// Execute sparse GEMM: C = A @ Bc |
| 155 | +SPARSEFLOW_API SparseFlowStatus sparseflow_sparse_gemm( |
| 156 | + SparseFlowKernel kernel, |
| 157 | + const void* A, // Dense matrix (M × K) |
| 158 | + const void* Bc, // Compressed sparse matrix |
| 159 | + void* C, // Output matrix (M × N) |
| 160 | + int M, int N, int K, |
| 161 | + SparseFlowDataType dtype, |
| 162 | + void* stream // CUDA stream (or NULL for default) |
| 163 | +); |
| 164 | + |
| 165 | +// Compress dense tensor to 2:4 format |
| 166 | +SPARSEFLOW_API SparseFlowStatus sparseflow_compress_2_4( |
| 167 | + SparseFlowContext ctx, |
| 168 | + const void* dense, // Input dense tensor |
| 169 | + void* compressed, // Output compressed tensor (50% size) |
| 170 | + void* metadata, // Output metadata |
| 171 | + int M, int N, |
| 172 | + SparseFlowDataType dtype |
| 173 | +); |
| 174 | + |
| 175 | +// Validate 2:4 sparsity pattern |
| 176 | +SPARSEFLOW_API SparseFlowStatus sparseflow_validate_2_4( |
| 177 | + const void* tensor, |
| 178 | + int M, int N, |
| 179 | + SparseFlowDataType dtype, |
| 180 | + int* is_valid // Output: 1 if valid, 0 if not |
| 181 | +); |
| 182 | + |
| 183 | +//===----------------------------------------------------------------------===// |
| 184 | +// Benchmarking utilities |
| 185 | +//===----------------------------------------------------------------------===// |
| 186 | + |
| 187 | +typedef struct { |
| 188 | + double elapsed_ms; // Elapsed time in milliseconds |
| 189 | + double tflops_effective; // Effective TFLOPS |
| 190 | + double tflops_real; // Real TFLOPS (accounting for sparsity) |
| 191 | + double bandwidth_gb_s; // Memory bandwidth (GB/s) |
| 192 | +} SparseFlowBenchmarkResult; |
| 193 | + |
| 194 | +// Benchmark kernel performance |
| 195 | +SPARSEFLOW_API SparseFlowStatus sparseflow_benchmark_kernel( |
| 196 | + SparseFlowKernel kernel, |
| 197 | + int M, int N, int K, |
| 198 | + int num_iterations, |
| 199 | + SparseFlowBenchmarkResult* result |
| 200 | +); |
| 201 | + |
| 202 | +#ifdef __cplusplus |
| 203 | +} |
| 204 | +#endif |
| 205 | + |
| 206 | +#endif // SPARSEFLOW_C_API_H |
0 commit comments