From f56203026207794ba85c37d5e8751896eb1d0c56 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 15 Aug 2025 14:01:57 +0300 Subject: [PATCH 01/27] fix window displays --- Lorr/Editor/main.cc | 11 ++++++++--- Lorr/Engine/Window/Window.cc | 2 ++ Lorr/Runtime/main.cc | 9 ++++++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Lorr/Editor/main.cc b/Lorr/Editor/main.cc index 682e7f66..6b2c9d43 100755 --- a/Lorr/Editor/main.cc +++ b/Lorr/Editor/main.cc @@ -9,12 +9,17 @@ i32 main(i32, c8 **) { lr::Window::init_sdl(); auto primary_display = lr::Window::display_at(0).value(); + auto window_info = lr::WindowInfo{ + .title = "Lorr Editor", + .display = &primary_display, + .width = 1720, + .height = 880, + .flags = lr::WindowFlag::Centered | lr::WindowFlag::Resizable, + }; lr::AppBuilder() // .module(3) - .module( - lr::WindowInfo{ .title = "Lorr Editor", .width = 1720, .height = 880, .flags = lr::WindowFlag::Centered | lr::WindowFlag::Resizable } - ) + .module(window_info) .module() .module() .module() diff --git a/Lorr/Engine/Window/Window.cc b/Lorr/Engine/Window/Window.cc index 23da7148..71952a23 100644 --- a/Lorr/Engine/Window/Window.cc +++ b/Lorr/Engine/Window/Window.cc @@ -93,6 +93,8 @@ auto Window::init(this Window &self) -> bool { } if (self.flags & WindowFlag::Fullscreen) { + self.width = self.display->resolution.x; + self.height = self.display->resolution.y; window_flags |= SDL_WINDOW_FULLSCREEN; } diff --git a/Lorr/Runtime/main.cc b/Lorr/Runtime/main.cc index fe031601..cfc19ab1 100755 --- a/Lorr/Runtime/main.cc +++ b/Lorr/Runtime/main.cc @@ -18,10 +18,17 @@ i32 main(i32 argc, c8 **argv) { lr::Window::init_sdl(); auto primary_display = lr::Window::display_at(0).value(); + auto window_info = lr::WindowInfo{ + .title = "Example Game", + .display = &primary_display, + .width = 1720, + .height = 880, + .flags = lr::WindowFlag::Fullscreen, + }; lr::AppBuilder() // .module(3) - .module(lr::WindowInfo{ .title = "Example Game", .width = 1720, .height = 880, .flags = lr::WindowFlag::Centered }) + .module(window_info) .module() .module() .module() From 5310acb01a4d4dce72c0a656d7bfacde181d7d00 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 15 Aug 2025 20:12:10 +0300 Subject: [PATCH 02/27] add atmos refreshing --- .../Resources/shaders/passes/brdf.slang | 2 +- .../shaders/passes/editor_grid.slang | 23 +- .../shaders/passes/sky_transmittance.slang | 2 +- .../Resources/shaders/passes/tonemap.slang | 412 ++++++++++++++- Lorr/Engine/Resources/shaders/sky.slang | 8 +- Lorr/Engine/Scene/GPUScene.hh | 6 +- Lorr/Engine/Scene/Scene.cc | 31 +- Lorr/Engine/Scene/Scene.hh | 2 + Lorr/Engine/Scene/SceneRenderer.cc | 475 +++++++++--------- Lorr/Engine/Scene/SceneRenderer.hh | 3 + 10 files changed, 708 insertions(+), 256 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 82ffe8d4..6015e53d 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -58,7 +58,7 @@ func fs_main(VertexOutput input) -> f32x4 { var sky_luminance = f32x3(1.0); if (params.environment.flags & (EnvironmentFlags::HasSun | EnvironmentFlags::HasAtmosphere)) { // SUN LIGHT COLOR ────────────────────────────────────────────────── - var eye_altitude = world_position.y * CAMERA_SCALE_UNIT; + var eye_altitude = max(world_position.y, 0.0) * CAMERA_SCALE_UNIT; eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; var eye_pos = f32x3(0.0, eye_altitude, 0.0); let up_vec = f32x3(0.0, 1.0, 0.0); diff --git a/Lorr/Engine/Resources/shaders/passes/editor_grid.slang b/Lorr/Engine/Resources/shaders/passes/editor_grid.slang index a302c44f..a9a11105 100644 --- a/Lorr/Engine/Resources/shaders/passes/editor_grid.slang +++ b/Lorr/Engine/Resources/shaders/passes/editor_grid.slang @@ -3,6 +3,11 @@ module editor_grid; import std; import scene; +struct ShaderParameters { + ConstantBuffer camera; +}; +uniform ParameterBlock params; + struct VertexOutput { f32x4 position : SV_Position; f32x3 near_pos; @@ -14,18 +19,13 @@ func unproject_point(f32x3 position, in mat4 inv_proj_view_mat) -> f32x3 { return p.xyz / p.w; } -struct PushConstants { - Camera *camera; -}; -[[vk::push_constant]] PushConstants C; - [[shader("vertex")]] VertexOutput vs_main(u32 vertex_id : SV_VertexID) { const let uv = f32x2((vertex_id << 1) & 2, vertex_id & 2); VertexOutput output; output.position = f32x4(uv * 2.0 - 1.0, 0.5, 1.0); - output.far_pos = unproject_point(f32x3(output.position.xy, 1.0), C.camera->inv_projection_view_mat); - output.near_pos = unproject_point(f32x3(output.position.xy, 0.0), C.camera->inv_projection_view_mat); + output.far_pos = unproject_point(f32x3(output.position.xy, 1.0), params.camera.inv_projection_view_mat); + output.near_pos = unproject_point(f32x3(output.position.xy, 0.0), params.camera.inv_projection_view_mat); return output; } @@ -70,16 +70,17 @@ struct FragmentOutput { FragmentOutput fs_main(VertexOutput input) { FragmentOutput output; float t = -input.near_pos.y / (input.far_pos.y - input.near_pos.y); - if (t < 0.0) - discard; + if (t != 0.0) { + //discard; + } float3 pixel_pos = input.near_pos + t * (input.far_pos - input.near_pos); pixel_pos.y -= 0.1; - float4 clip_space_pos = mul(C.camera->projection_view_mat, float4(pixel_pos, 1.0)); + float4 clip_space_pos = mul(params.camera.projection_view_mat, float4(pixel_pos, 1.0)); float depth = clip_space_pos.z / clip_space_pos.w; - output.color = pristine_grid(pixel_pos, float2(0.005)) * float(t > 0.0); + output.color = pristine_grid(pixel_pos, float2(0.005)); output.depth = depth; return output; } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang index 4781835a..15ad0765 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang @@ -31,7 +31,7 @@ func cs_main( f32x3 sun_dir = f32x3(0.0, sqrt(1.0 - lut_y * lut_y), lut_y); f32x3 ray_pos = f32x3(0.0, 0.0, lut_x); - const f32 STEP_COUNT = 1000.0; + const f32 STEP_COUNT = 420.0; f32 distance = std::ray_sphere_intersect_nearest(ray_pos, sun_dir, params.environment.atmos_atmos_radius).value; f32 distance_per_step = distance / STEP_COUNT; f32x3 optical_depth = 0.0; diff --git a/Lorr/Engine/Resources/shaders/passes/tonemap.slang b/Lorr/Engine/Resources/shaders/passes/tonemap.slang index 49f6afe7..ebd01f22 100644 --- a/Lorr/Engine/Resources/shaders/passes/tonemap.slang +++ b/Lorr/Engine/Resources/shaders/passes/tonemap.slang @@ -146,6 +146,410 @@ float3 agx_tonemapping(float3 color) { return color; } +// ----------------------------------------------------------------------------- +// Mode options. +// ----------------------------------------------------------------------------- +#define TONE_MAPPING_UCS_ICTCP 0 +#define TONE_MAPPING_UCS_JZAZBZ 1 +#define TONE_MAPPING_UCS TONE_MAPPING_UCS_ICTCP + +// ----------------------------------------------------------------------------- +// Defines the SDR reference white level used in our tone mapping (typically 250 nits). +// ----------------------------------------------------------------------------- +#define GRAN_TURISMO_SDR_PAPER_WHITE 250.0f // cd/m^2 + +// ----------------------------------------------------------------------------- +// Gran Turismo luminance-scale conversion helpers. +// In Gran Turismo, 1.0f in the linear frame-buffer space corresponds to +// REFERENCE_LUMINANCE cd/m^2 of physical luminance (typically 100 cd/m^2). +// ----------------------------------------------------------------------------- +#define REFERENCE_LUMINANCE 100.0f // cd/m^2 <-> 1.0f + +float +frameBufferValueToPhysicalValue(float fbValue) +{ + // Converts linear frame-buffer value to physical luminance (cd/m^2) + // where 1.0 corresponds to REFERENCE_LUMINANCE (e.g., 100 cd/m^2). + return fbValue * REFERENCE_LUMINANCE; +} + +float +physicalValueToFrameBufferValue(float physical) +{ + // Converts physical luminance (cd/m^2) to a linear frame-buffer value, + // where 1.0 corresponds to REFERENCE_LUMINANCE (e.g., 100 cd/m^2). + return physical / REFERENCE_LUMINANCE; +} + +// ----------------------------------------------------------------------------- +// Utility functions. +// ----------------------------------------------------------------------------- +float +smoothStep(float x, float edge0, float edge1) +{ + float t = (x - edge0) / (edge1 - edge0); + + if (x < edge0) + { + return 0.0f; + } + if (x > edge1) + { + return 1.0f; + } + + return t * t * (3.0f - 2.0f * t); +} + +float +chromaCurve(float x, float a, float b) +{ + return 1.0f - smoothStep(x, a, b); +} + + +// ----------------------------------------------------------------------------- +// "GT Tone Mapping" curve with convergent shoulder. +// ----------------------------------------------------------------------------- +struct GTToneMappingCurveV2 +{ + float peakIntensity_; + float alpha_; + float midPoint_; + float linearSection_; + float toeStrength_; + float kA_, kB_, kC_; + + [mutating] + void initializeCurve(float monitorIntensity, + float alpha, + float grayPoint, + float linearSection, + float toeStrength) + { + peakIntensity_ = monitorIntensity; + alpha_ = alpha; + midPoint_ = grayPoint; + linearSection_ = linearSection; + toeStrength_ = toeStrength; + + // Pre-compute constants for the shoulder region. + float k = (linearSection_ - 1.0f) / (alpha_ - 1.0f); + kA_ = peakIntensity_ * linearSection_ + peakIntensity_ * k; + kB_ = -peakIntensity_ * k * exp(linearSection_ / k); + kC_ = -1.0f / (k * peakIntensity_); + } + + float evaluateCurve(float x) + { + if (x < 0.0f) + { + return 0.0f; + } + + float weightLinear = smoothStep(x, 0.0f, midPoint_); + float weightToe = 1.0f - weightLinear; + + // Shoulder mapping for highlights. + float shoulder = kA_ + kB_ * exp(x * kC_); + + if (x < linearSection_ * peakIntensity_) + { + float toeMapped = midPoint_ * pow(x / midPoint_, toeStrength_); + return weightToe * toeMapped + weightLinear * x; + } + else + { + return shoulder; + } + } +}; + +// ----------------------------------------------------------------------------- +// EOTF / inverse-EOTF for ST-2084 (PQ). +// Note: Introduce exponentScaleFactor to allow scaling of the exponent in the EOTF for Jzazbz. +// ----------------------------------------------------------------------------- +float +eotfSt2084(float n, float exponentScaleFactor = 1.0f) +{ + if (n < 0.0f) + { + n = 0.0f; + } + if (n > 1.0f) + { + n = 1.0f; + } + + // Base functions from SMPTE ST 2084:2014 + // Converts from normalized PQ (0-1) to absolute luminance in cd/m^2 (linear light) + // Assumes float input; does not handle integer encoding (Annex) + // Assumes full-range signal (0-1) + const float m1 = 0.1593017578125f; // (2610 / 4096) / 4 + const float m2 = 78.84375f * exponentScaleFactor; // (2523 / 4096) * 128 + const float c1 = 0.8359375f; // 3424 / 4096 + const float c2 = 18.8515625f; // (2413 / 4096) * 32 + const float c3 = 18.6875f; // (2392 / 4096) * 32 + const float pqC = 10000.0f; // Maximum luminance supported by PQ (cd/m^2) + + // Does not handle signal range from 2084 - assumes full range (0-1) + float np = pow(n, 1.0f / m2); + float l = np - c1; + + if (l < 0.0f) + { + l = 0.0f; + } + + l = l / (c2 - c3 * np); + l = pow(l, 1.0f / m1); + + // Convert absolute luminance (cd/m^2) into the frame-buffer linear scale. + return physicalValueToFrameBufferValue(l * pqC); +} + +float +inverseEotfSt2084(float v, float exponentScaleFactor = 1.0f) +{ + const float m1 = 0.1593017578125f; + const float m2 = 78.84375f * exponentScaleFactor; + const float c1 = 0.8359375f; + const float c2 = 18.8515625f; + const float c3 = 18.6875f; + const float pqC = 10000.0f; + + // Convert the frame-buffer linear scale into absolute luminance (cd/m^2). + float physical = frameBufferValueToPhysicalValue(v); + float y = physical / pqC; // Normalize for the ST-2084 curve + + float ym = pow(y, m1); + return exp2(m2 * (log2(c1 + c2 * ym) - log2(1.0f + c3 * ym))); +} + +// ----------------------------------------------------------------------------- +// ICtCp conversion. +// Reference: ITU-T T.302 (https://www.itu.int/rec/T-REC-T.302/en) +// ----------------------------------------------------------------------------- +void +rgbToICtCp(f32x3 rgb, out f32x3 ictCp) // Input: linear Rec.2020 +{ + float l = (rgb[0] * 1688.0f + rgb[1] * 2146.0f + rgb[2] * 262.0f) / 4096.0f; + float m = (rgb[0] * 683.0f + rgb[1] * 2951.0f + rgb[2] * 462.0f) / 4096.0f; + float s = (rgb[0] * 99.0f + rgb[1] * 309.0f + rgb[2] * 3688.0f) / 4096.0f; + + float lPQ = inverseEotfSt2084(l); + float mPQ = inverseEotfSt2084(m); + float sPQ = inverseEotfSt2084(s); + + ictCp[0] = (2048.0f * lPQ + 2048.0f * mPQ) / 4096.0f; + ictCp[1] = (6610.0f * lPQ - 13613.0f * mPQ + 7003.0f * sPQ) / 4096.0f; + ictCp[2] = (17933.0f * lPQ - 17390.0f * mPQ - 543.0f * sPQ) / 4096.0f; +} + +void +iCtCpToRgb(f32x3 ictCp, out f32x3 rgb) // Output: linear Rec.2020 +{ + float l = ictCp[0] + 0.00860904f * ictCp[1] + 0.11103f * ictCp[2]; + float m = ictCp[0] - 0.00860904f * ictCp[1] - 0.11103f * ictCp[2]; + float s = ictCp[0] + 0.560031f * ictCp[1] - 0.320627f * ictCp[2]; + + float lLin = eotfSt2084(l); + float mLin = eotfSt2084(m); + float sLin = eotfSt2084(s); + + rgb[0] = max(3.43661f * lLin - 2.50645f * mLin + 0.0698454f * sLin, 0.0f); + rgb[1] = max(-0.79133f * lLin + 1.9836f * mLin - 0.192271f * sLin, 0.0f); + rgb[2] = max(-0.0259499f * lLin - 0.0989137f * mLin + 1.12486f * sLin, 0.0f); +} + +// ----------------------------------------------------------------------------- +// Jzazbz conversion. +// Reference: +// Muhammad Safdar, Guihua Cui, Youn Jin Kim, and Ming Ronnier Luo, +// "Perceptually uniform color space for image signals including high dynamic +// range and wide gamut," Opt. Express 25, 15131-15151 (2017) +// Note: Coefficients adjusted for linear Rec.2020 +// ----------------------------------------------------------------------------- +#define JZAZBZ_EXPONENT_SCALE_FACTOR 1.7f // Scale factor for exponent + +void +rgbToJzazbz(f32x3 rgb, out f32x3 jab) // Input: linear Rec.2020 +{ + float l = rgb[0] * 0.530004f + rgb[1] * 0.355704f + rgb[2] * 0.086090f; + float m = rgb[0] * 0.289388f + rgb[1] * 0.525395f + rgb[2] * 0.157481f; + float s = rgb[0] * 0.091098f + rgb[1] * 0.147588f + rgb[2] * 0.734234f; + + float lPQ = inverseEotfSt2084(l, JZAZBZ_EXPONENT_SCALE_FACTOR); + float mPQ = inverseEotfSt2084(m, JZAZBZ_EXPONENT_SCALE_FACTOR); + float sPQ = inverseEotfSt2084(s, JZAZBZ_EXPONENT_SCALE_FACTOR); + + float iz = 0.5f * lPQ + 0.5f * mPQ; + + jab[0] = (0.44f * iz) / (1.0f - 0.56f * iz) - 1.6295499532821566e-11f; + jab[1] = 3.524000f * lPQ - 4.066708f * mPQ + 0.542708f * sPQ; + jab[2] = 0.199076f * lPQ + 1.096799f * mPQ - 1.295875f * sPQ; +} + +void +jzazbzToRgb(f32x3 jab, out f32x3 rgb) // Output: linear Rec.2020 +{ + float jz = jab[0] + 1.6295499532821566e-11f; + float iz = jz / (0.44f + 0.56f * jz); + float a = jab[1]; + float b = jab[2]; + + float l = iz + a * 1.386050432715393e-1f + b * 5.804731615611869e-2f; + float m = iz + a * -1.386050432715393e-1f + b * -5.804731615611869e-2f; + float s = iz + a * -9.601924202631895e-2f + b * -8.118918960560390e-1f; + + float lLin = eotfSt2084(l, JZAZBZ_EXPONENT_SCALE_FACTOR); + float mLin = eotfSt2084(m, JZAZBZ_EXPONENT_SCALE_FACTOR); + float sLin = eotfSt2084(s, JZAZBZ_EXPONENT_SCALE_FACTOR); + + rgb[0] = lLin * 2.990669f + mLin * -2.049742f + sLin * 0.088977f; + rgb[1] = lLin * -1.634525f + mLin * 3.145627f + sLin * -0.483037f; + rgb[2] = lLin * -0.042505f + mLin * -0.377983f + sLin * 1.448019f; +} + +// ----------------------------------------------------------------------------- +// Unified color space (UCS): ICtCp or Jzazbz. +// ----------------------------------------------------------------------------- +#if TONE_MAPPING_UCS == TONE_MAPPING_UCS_ICTCP +void +rgbToUcs(f32x3 rgb, out f32x3 ucs) +{ + rgbToICtCp(rgb, ucs); +} +void +ucsToRgb(f32x3 ucs, out f32x3 rgb) +{ + iCtCpToRgb(ucs, rgb); +} +#elif TONE_MAPPING_UCS == TONE_MAPPING_UCS_JZAZBZ +void +rgbToUcs(f32x3 rgb, out f32x3 ucs) +{ + rgbToJzazbz(rgb, ucs); +} +void +ucsToRgb(f32x3 ucs, out f32x3 rgb) +{ + jzazbzToRgb(ucs, rgb); +} +#else +#error "Unsupported TONE_MAPPING_UCS value. Please define TONE_MAPPING_UCS as either TONE_MAPPING_UCS_ICTCP or TONE_MAPPING_UCS_JZAZBZ." +#endif + +// ----------------------------------------------------------------------------- +// GT7 Tone Mapping class. +// ----------------------------------------------------------------------------- +struct GT7ToneMapping +{ + float sdrCorrectionFactor_; + + float framebufferLuminanceTarget_; + float framebufferLuminanceTargetUcs_; // Target luminance in UCS space + GTToneMappingCurveV2 curve_; + + float blendRatio_; + float fadeStart_; + float fadeEnd_; + + // Initializes the tone mapping curve and related parameters based on the target display luminance. + // This method should not be called directly. Use initializeAsHDR() or initializeAsSDR() instead. + [mutating] + void initializeParameters(float physicalTargetLuminance) + { + framebufferLuminanceTarget_ = physicalValueToFrameBufferValue(physicalTargetLuminance); + + // Initialize the curve (slightly different parameters from GT Sport). + curve_.initializeCurve(framebufferLuminanceTarget_, 0.25f, 0.538f, 0.444f, 1.280f); + + // Default parameters. + blendRatio_ = 0.6f; + fadeStart_ = 0.98f; + fadeEnd_ = 1.16f; + + f32x3 ucs; + f32x3 rgb = { framebufferLuminanceTarget_, + framebufferLuminanceTarget_, + framebufferLuminanceTarget_ }; + rgbToUcs(rgb, ucs); + framebufferLuminanceTargetUcs_ = + ucs[0]; // Use the first UCS component (I or Jz) as luminance + } + + // Initialize for HDR (High Dynamic Range) display. + // Input: target display peak luminance in nits (range: 250 to 10,000) + // Note: The lower limit is 250 because the parameters for GTToneMappingCurveV2 + // were determined based on an SDR paper white assumption of 250 nits (GRAN_TURISMO_SDR_PAPER_WHITE). + [mutating] + void initializeAsHDR(float physicalTargetLuminance) + { + sdrCorrectionFactor_ = 1.0f; + initializeParameters(physicalTargetLuminance); + } + + // Initialize for SDR (Standard Dynamic Range) display. + [mutating] + void initializeAsSDR() + { + // Regarding SDR output: + // First, in GT (Gran Turismo), it is assumed that a maximum value of 1.0 in SDR output + // corresponds to GRAN_TURISMO_SDR_PAPER_WHITE (typically 250 nits). + // Therefore, tone mapping for SDR output is performed based on GRAN_TURISMO_SDR_PAPER_WHITE. + // However, in the sRGB standard, 1.0f corresponds to 100 nits, + // so we need to "undo" the tone-mapped values accordingly. + // To match the sRGB range, the tone-mapped values are scaled using sdrCorrectionFactor_. + // + // * These adjustments ensure that the visual appearance (in terms of brightness) + // stays generally consistent across both HDR and SDR outputs for the same rendered content. + sdrCorrectionFactor_ = 1.0f / physicalValueToFrameBufferValue(GRAN_TURISMO_SDR_PAPER_WHITE); + initializeParameters(GRAN_TURISMO_SDR_PAPER_WHITE); + } + + // Input: linear Rec.2020 RGB (frame buffer values) + // Output: tone-mapped RGB (frame buffer values); + // - in SDR mode: mapped to [0, 1], ready for sRGB OETF + // - in HDR mode: mapped to [0, framebufferLuminanceTarget_], ready for PQ inverse-EOTF + // Note: framebufferLuminanceTarget_ represents the display's target peak luminance converted to a frame buffer value. + // The returned values are suitable for applying the appropriate OETF to generate final output signal. + void applyToneMapping(f32x3 rgb, out f32x3 out) + { + // Convert to UCS to separate luminance and chroma. + f32x3 ucs; + rgbToUcs(rgb, ucs); + + // Per-channel tone mapping ("skewed" color). + f32x3 skewedRgb = { curve_.evaluateCurve(rgb[0]), + curve_.evaluateCurve(rgb[1]), + curve_.evaluateCurve(rgb[2]) }; + + f32x3 skewedUcs; + rgbToUcs(skewedRgb, skewedUcs); + + float chromaScale = + chromaCurve(ucs[0] / framebufferLuminanceTargetUcs_, fadeStart_, fadeEnd_); + + const f32x3 scaledUcs = { skewedUcs[0], // Luminance from skewed color + ucs[1] * chromaScale, // Scaled chroma components + ucs[2] * chromaScale }; + + // Convert back to RGB. + f32x3 scaledRgb; + ucsToRgb(scaledUcs, scaledRgb); + + // Final blend between per-channel and UCS-scaled results. + for (int i = 0; i < 3; ++i) + { + float blended = (1.0f - blendRatio_) * skewedRgb[i] + blendRatio_ * scaledRgb[i]; + // When using SDR, apply the correction factor. + // When using HDR, sdrCorrectionFactor_ is 1.0f, so it has no effect. + out[i] = sdrCorrectionFactor_ * min(blended, framebufferLuminanceTarget_); + } + } +}; + [[shader("fragment")]] f32x4 fs_main(VertexOutput input) { f32x3 color = params.input_image.sample_mip(params.sampler, input.tex_coord, 0.0).rgb; @@ -157,7 +561,11 @@ f32x4 fs_main(VertexOutput input) { //color = ACES_Film(color); //color = ACES_Fitted(color); //color = PBRNeutralToneMapping(color); // this looks like shit, figure out why - color = agx_tonemapping(color); + // color = agx_tonemapping(color); + GT7ToneMapping gt7; + gt7.initializeAsSDR(); + f32x3 gt7_color; + gt7.applyToneMapping(color, gt7_color); - return f32x4(color, 1.0); + return f32x4(gt7_color, 1.0); } diff --git a/Lorr/Engine/Resources/shaders/sky.slang b/Lorr/Engine/Resources/shaders/sky.slang index 6c65e443..e0d156bc 100644 --- a/Lorr/Engine/Resources/shaders/sky.slang +++ b/Lorr/Engine/Resources/shaders/sky.slang @@ -242,9 +242,7 @@ public func integrate_single_scattered_luminance( let medium_info = MediumScattering(environment, altitude); f32x3 up_vec = normalize(step_pos); - f32x3 up_vec_scaled = PLANET_RADIUS_OFFSET * up_vec; - f32 earth_shadow = std::ray_sphere_intersect_nearest( - step_pos - up_vec_scaled, info.sun_dir, environment.atmos_planet_radius).hasValue ? 0.0 : 1.0; + f32 earth_shadow = std::ray_sphere_intersect_nearest(step_pos, info.sun_dir, environment.atmos_planet_radius).hasValue ? 0.0 : 1.0; f32 sun_theta = dot(info.sun_dir, up_vec); f32x2 transmittance_uv = transmittance_params_to_lut_uv( @@ -275,6 +273,10 @@ public func integrate_single_scattered_luminance( f32x3 integral = (sun_luminance - sun_luminance * step_transmittance) / medium_info.extinction_sum; f32x3 ms_integral = (medium_info.scattering_sum - medium_info.scattering_sum * step_transmittance) / medium_info.extinction_sum; + let extinction_zero = medium_info.extinction_sum == f32x3(0.0); + integral = select(extinction_zero, f32x3(0.0), integral); + ms_integral = select(extinction_zero, f32x3(0.0), ms_integral); + result.luminance += info.sun_intensity * (integral * result.transmittance); result.multiscattering_as_1 += ms_integral * result.transmittance; result.transmittance *= step_transmittance; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 2b9c98af..a5eb978c 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -76,13 +76,13 @@ struct Environment { alignas(4) glm::vec3 sun_direction = {}; alignas(4) f32 sun_intensity = 10.0f; // Atmosphere - alignas(4) glm::vec3 atmos_rayleigh_scatter = { 0.005802f, 0.013558f, 0.033100f }; + alignas(4) glm::vec3 atmos_rayleigh_scatter = { 0.005802f, 0.014338f, 0.032800f }; alignas(4) f32 atmos_rayleigh_density = 8.0f; alignas(4) glm::vec3 atmos_mie_scatter = { 0.003996f, 0.003996f, 0.003996f }; alignas(4) f32 atmos_mie_density = 1.2f; alignas(4) f32 atmos_mie_extinction = 0.004440f; - alignas(4) f32 atmos_mie_asymmetry = 3.6f; - alignas(4) glm::vec3 atmos_ozone_absorption = { 0.000650f, 0.001881f, 0.000085f }; + alignas(4) f32 atmos_mie_asymmetry = 3.5f; + alignas(4) glm::vec3 atmos_ozone_absorption = { 0.000650f, 0.001781f, 0.000085f }; alignas(4) f32 atmos_ozone_height = 25.0f; alignas(4) f32 atmos_ozone_thickness = 15.0f; alignas(4) glm::vec3 atmos_terrain_albedo = { 0.3f, 0.3f, 0.3f }; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 2e79f8bf..a1a718c6 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -218,6 +218,7 @@ static auto json_to_entity(Scene &self, flecs::entity root, simdjson::ondemand:: std::visit( ls::match{ [](const auto &) {}, + [&](bool *v) { *v = member_json.get_bool().value_unsafe(); }, [&](f32 *v) { *v = static_cast(member_json.get_double().value_unsafe()); }, [&](i32 *v) { *v = static_cast(member_json.get_int64().value_unsafe()); }, [&](u32 *v) { *v = member_json.get_uint64().value_unsafe(); }, @@ -344,6 +345,7 @@ auto entity_to_json(JsonWriter &json, flecs::entity root) -> void { std::visit( ls::match{ [](const auto &) {}, + [&](bool *v) { member_json = *v; }, [&](f32 *v) { member_json = *v; }, [&](i32 *v) { member_json = *v; }, [&](u32 *v) { member_json = *v; }, @@ -484,8 +486,10 @@ auto Scene::create_model_entity(this Scene &self, UUID &importing_model_uuid) -> node_entity.set(transform_comp); if (cur_node.mesh_index.has_value()) { - node_entity.set({ .model_uuid = importing_model_uuid, - .mesh_index = static_cast(cur_node.mesh_index.value()) }); + node_entity.set({ + .model_uuid = importing_model_uuid, + .mesh_index = static_cast(cur_node.mesh_index.value()), + }); } node_entity.child_of(root); @@ -658,9 +662,8 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared c.frustum_projection_view_mat = camera_data.projection_view_mat; }); - ls::option environment_data = ls::nullopt; - environment_query.each([&environment_data](flecs::entity, ECS::Environment &environment_comp) { - auto &environment = environment_data.emplace(GPU::Environment{}); + GPU::Environment environment = {}; + environment_query.each([&environment](flecs::entity, ECS::Environment &environment_comp) { environment.flags |= environment_comp.sun ? GPU::EnvironmentFlags::HasSun : 0; environment.flags |= environment_comp.atmos ? GPU::EnvironmentFlags::HasAtmosphere : 0; environment.flags |= environment_comp.eye_adaptation ? GPU::EnvironmentFlags::HasEyeAdaptation : 0; @@ -688,6 +691,21 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared environment.eye_ISO_K = environment_comp.eye_iso / environment_comp.eye_k; }); + auto regenerate_sky = false; + regenerate_sky |= self.last_environment.atmos_rayleigh_scatter != environment.atmos_rayleigh_scatter; + regenerate_sky |= self.last_environment.atmos_rayleigh_density != environment.atmos_rayleigh_density; + regenerate_sky |= self.last_environment.atmos_mie_scatter != environment.atmos_mie_scatter; + regenerate_sky |= self.last_environment.atmos_mie_density != environment.atmos_mie_density; + regenerate_sky |= self.last_environment.atmos_mie_extinction != environment.atmos_mie_extinction; + regenerate_sky |= self.last_environment.atmos_mie_asymmetry != environment.atmos_mie_asymmetry; + regenerate_sky |= self.last_environment.atmos_ozone_absorption != environment.atmos_ozone_absorption; + regenerate_sky |= self.last_environment.atmos_ozone_height != environment.atmos_ozone_height; + regenerate_sky |= self.last_environment.atmos_ozone_thickness != environment.atmos_ozone_thickness; + regenerate_sky |= self.last_environment.atmos_terrain_albedo != environment.atmos_terrain_albedo; + regenerate_sky |= self.last_environment.atmos_atmos_radius != environment.atmos_atmos_radius; + regenerate_sky |= self.last_environment.atmos_planet_radius != environment.atmos_planet_radius; + self.last_environment = environment; + auto max_meshlet_instance_count = 0_u32; auto gpu_meshes = std::vector(); auto gpu_mesh_instances = std::vector(); @@ -774,13 +792,14 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared auto prepare_info = FramePrepareInfo{ .mesh_instance_count = self.mesh_instance_count, .max_meshlet_instance_count = self.max_meshlet_instance_count, + .regenerate_sky = regenerate_sky, .dirty_transform_ids = self.dirty_transforms, .gpu_transforms = self.transforms.slots_unsafe(), .dirty_material_indices = dirty_material_indices, .gpu_materials = gpu_materials, .gpu_meshes = gpu_meshes, .gpu_mesh_instances = gpu_mesh_instances, - .environment = environment_data.value_or(GPU::Environment{}), + .environment = environment, .camera = active_camera_data.value_or(GPU::Camera{}), }; auto prepared_frame = renderer.prepare_frame(prepare_info); diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 74848e93..6fc762ae 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -56,6 +56,8 @@ private: GPU::CullFlags cull_flags = GPU::CullFlags::All; + GPU::Environment last_environment = {}; + public: auto init(this Scene &, const std::string &name) -> bool; auto destroy(this Scene &) -> void; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 9c4fff7a..ef53bcec 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -17,7 +17,6 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { auto &device = App::mod(); auto &bindless_descriptor_set = device.get_descriptor_set(); - auto &transfer_man = device.transfer_man(); auto &asset_man = App::mod(); auto shaders_root = asset_man.asset_root_path(AssetType::Shader); @@ -182,58 +181,6 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, hiz_pipeline_info).value(); - // ── SKY LUTS ──────────────────────────────────────────────────────── - auto temp_environment_info = GPU::Environment{}; - temp_environment_info.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); - temp_environment_info.multiscattering_lut_size = self.sky_multiscatter_lut_view.extent(); - auto temp_environment = transfer_man.scratch_buffer(temp_environment_info); - - auto transmittance_lut_pass = vuk::make_pass( - "transmittance lut", - [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeRW) dst, - VUK_BA(vuk::eComputeRead) atmos) { - cmd_list // - .bind_compute_pipeline("passes.sky_transmittance") - .bind_image(0, 0, dst) - .bind_buffer(0, 1, atmos) - .dispatch_invocations_per_pixel(dst); - - return std::make_tuple(dst, atmos); - } - ); - - auto transmittance_lut_attachment = self.sky_transmittance_lut_view.discard(device, "sky transmittance lut", vuk::ImageUsageFlagBits::eStorage); - std::tie(transmittance_lut_attachment, temp_environment) = - transmittance_lut_pass(std::move(transmittance_lut_attachment), std::move(temp_environment)); - - auto multiscatter_lut_pass = vuk::make_pass( - "multiscatter lut", - [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeSampled) sky_transmittance_lut, - VUK_IA(vuk::eComputeRW) sky_multiscatter_lut, - VUK_BA(vuk::eComputeRead) atmos) { - cmd_list // - .bind_compute_pipeline("passes.sky_multiscattering") - .bind_sampler(0, 0, { .magFilter = vuk::Filter::eLinear, .minFilter = vuk::Filter::eLinear }) - .bind_image(0, 1, sky_transmittance_lut) - .bind_buffer(0, 2, atmos) - .bind_image(0, 3, sky_multiscatter_lut) - .dispatch_invocations_per_pixel(sky_multiscatter_lut); - - return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, atmos); - } - ); - - auto multiscatter_lut_attachment = self.sky_multiscatter_lut_view.discard(device, "sky multiscatter lut", vuk::ImageUsageFlagBits::eStorage); - std::tie(transmittance_lut_attachment, multiscatter_lut_attachment, temp_environment) = - multiscatter_lut_pass(std::move(transmittance_lut_attachment), std::move(multiscatter_lut_attachment), std::move(temp_environment)); - - transmittance_lut_attachment = transmittance_lut_attachment.as_released(vuk::eComputeSampled, vuk::DomainFlagBits::eGraphicsQueue); - multiscatter_lut_attachment = multiscatter_lut_attachment.as_released(vuk::eComputeSampled, vuk::DomainFlagBits::eGraphicsQueue); - transfer_man.wait_on(std::move(transmittance_lut_attachment)); - transfer_man.wait_on(std::move(multiscatter_lut_attachment)); - self.histogram_luminance_buffer = Buffer::create(device, sizeof(GPU::HistogramLuminance)).value(); vuk::fill(vuk::acquire_buf("histogram luminance", *device.buffer(self.histogram_luminance_buffer.id()), vuk::eNone), 0); @@ -382,9 +329,242 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.mesh_instance_count = info.mesh_instance_count; prepared_frame.environment_flags = static_cast(info.environment.flags); + if (info.regenerate_sky || !self.sky_transmittance_lut_view) { + auto transmittance_lut_pass = vuk::make_pass( + "transmittance lut", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeRW) dst, + VUK_BA(vuk::eComputeRead) environment) { + cmd_list // + .bind_compute_pipeline("passes.sky_transmittance") + .bind_image(0, 0, dst) + .bind_buffer(0, 1, environment) + .dispatch_invocations_per_pixel(dst); + + return std::make_tuple(dst, environment); + } + ); + + prepared_frame.sky_transmittance_lut = + self.sky_transmittance_lut_view.discard(device, "sky transmittance lut", vuk::ImageUsageFlagBits::eStorage); + std::tie(prepared_frame.sky_transmittance_lut, prepared_frame.environment_buffer) = + transmittance_lut_pass(std::move(prepared_frame.sky_transmittance_lut), std::move(prepared_frame.environment_buffer)); + + auto multiscatter_lut_pass = vuk::make_pass( + "multiscatter lut", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) sky_transmittance_lut, + VUK_IA(vuk::eComputeRW) sky_multiscatter_lut, + VUK_BA(vuk::eComputeRead) environment) { + cmd_list // + .bind_compute_pipeline("passes.sky_multiscattering") + .bind_sampler(0, 0, { .magFilter = vuk::Filter::eLinear, .minFilter = vuk::Filter::eLinear }) + .bind_image(0, 1, sky_transmittance_lut) + .bind_buffer(0, 2, environment) + .bind_image(0, 3, sky_multiscatter_lut) + .dispatch_invocations_per_pixel(sky_multiscatter_lut); + + return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment); + } + ); + + prepared_frame.sky_multiscatter_lut = + self.sky_multiscatter_lut_view.discard(device, "sky multiscatter lut", vuk::ImageUsageFlagBits::eStorage); + std::tie(prepared_frame.sky_transmittance_lut, prepared_frame.sky_multiscatter_lut, prepared_frame.environment_buffer) = + multiscatter_lut_pass( + std::move(prepared_frame.sky_transmittance_lut), + std::move(prepared_frame.sky_multiscatter_lut), + std::move(prepared_frame.environment_buffer) + ); + } else { + prepared_frame.sky_transmittance_lut = + self.sky_transmittance_lut_view.acquire(device, "sky transmittance lut", vuk::ImageUsageFlagBits::eSampled, vuk::Access::eComputeSampled); + prepared_frame.sky_multiscatter_lut = + self.sky_multiscatter_lut_view.acquire(device, "sky multiscatter lut", vuk::ImageUsageFlagBits::eSampled, vuk::Access::eComputeSampled); + } + return prepared_frame; } +static auto draw_sky( + SceneRenderer &self, + vuk::Value &dst_attachment, + vuk::Value &depth_attachment, + vuk::Value &sky_transmittance_lut_attachment, + vuk::Value &sky_multiscatter_lut_attachment, + vuk::Value &environment_buffer, + vuk::Value &camera_buffer +) -> void { + ZoneScoped; + + auto sky_view_lut_attachment = vuk::declare_ia( + "sky view lut", + { .image_type = vuk::ImageType::e2D, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .extent = self.sky_view_lut_extent, + .format = vuk::Format::eR16G16B16A16Sfloat, + .sample_count = vuk::Samples::e1, + .view_type = vuk::ImageViewType::e2D, + .level_count = 1, + .layer_count = 1 } + ); + + auto sky_aerial_perspective_attachment = vuk::declare_ia( + "sky aerial perspective", + { .image_type = vuk::ImageType::e3D, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .extent = self.sky_aerial_perspective_lut_extent, + .sample_count = vuk::Samples::e1, + .view_type = vuk::ImageViewType::e3D, + .level_count = 1, + .layer_count = 1 } + ); + sky_aerial_perspective_attachment.same_format_as(sky_view_lut_attachment); + + // ── SKY VIEW LUT ──────────────────────────────────────────────────── + auto sky_view_pass = vuk::make_pass( + "sky view", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) sky_transmittance_lut, + VUK_IA(vuk::eComputeSampled) sky_multiscatter_lut, + VUK_BA(vuk::eComputeRead) environment, + VUK_BA(vuk::eComputeRead) camera, + VUK_IA(vuk::eComputeRW) sky_view_lut) { + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + cmd_list // + .bind_compute_pipeline("passes.sky_view") + .bind_sampler(0, 0, linear_clamp_sampler) + .bind_image(0, 1, sky_transmittance_lut) + .bind_image(0, 2, sky_multiscatter_lut) + .bind_buffer(0, 3, environment) + .bind_buffer(0, 4, camera) + .bind_image(0, 5, sky_view_lut) + .dispatch_invocations_per_pixel(sky_view_lut); + return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment, camera, sky_view_lut); + } + ); + std::tie(sky_transmittance_lut_attachment, sky_multiscatter_lut_attachment, environment_buffer, camera_buffer, sky_view_lut_attachment) = + sky_view_pass( + std::move(sky_transmittance_lut_attachment), + std::move(sky_multiscatter_lut_attachment), + std::move(environment_buffer), + std::move(camera_buffer), + std::move(sky_view_lut_attachment) + ); + + // ── SKY AERIAL PERSPECTIVE ────────────────────────────────────────── + auto sky_aerial_perspective_pass = vuk::make_pass( + "sky aerial perspective", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) sky_transmittance_lut, + VUK_IA(vuk::eComputeSampled) sky_multiscatter_lut, + VUK_BA(vuk::eComputeRead) environment, + VUK_BA(vuk::eComputeRead) camera, + VUK_IA(vuk::eComputeRW) sky_aerial_perspective_lut) { + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + cmd_list // + .bind_compute_pipeline("passes.sky_aerial_perspective") + .bind_sampler(0, 0, linear_clamp_sampler) + .bind_image(0, 1, sky_transmittance_lut) + .bind_image(0, 2, sky_multiscatter_lut) + .bind_buffer(0, 3, environment) + .bind_buffer(0, 4, camera) + .bind_image(0, 5, sky_aerial_perspective_lut) + .dispatch_invocations_per_pixel(sky_aerial_perspective_lut); + return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment, camera, sky_aerial_perspective_lut); + } + ); + + std::tie( + sky_transmittance_lut_attachment, + sky_multiscatter_lut_attachment, + environment_buffer, + camera_buffer, + sky_aerial_perspective_attachment + ) = + sky_aerial_perspective_pass( + std::move(sky_transmittance_lut_attachment), + std::move(sky_multiscatter_lut_attachment), + std::move(environment_buffer), + std::move(camera_buffer), + std::move(sky_aerial_perspective_attachment) + ); + + // ── SKY FINAL ─────────────────────────────────────────────────────── + auto sky_final_pass = vuk::make_pass( + "sky final", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eColorWrite) dst, + VUK_IA(vuk::eFragmentSampled) sky_transmittance_lut, + VUK_IA(vuk::eFragmentSampled) sky_aerial_perspective_lut, + VUK_IA(vuk::eFragmentSampled) sky_view_lut, + VUK_IA(vuk::eFragmentSampled) depth, + VUK_BA(vuk::eFragmentRead) environment, + VUK_BA(vuk::eFragmentRead) camera) { + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + vuk::PipelineColorBlendAttachmentState blend_info = { + .blendEnable = true, + .srcColorBlendFactor = vuk::BlendFactor::eOne, + .dstColorBlendFactor = vuk::BlendFactor::eSrcAlpha, + .colorBlendOp = vuk::BlendOp::eAdd, + .srcAlphaBlendFactor = vuk::BlendFactor::eZero, + .dstAlphaBlendFactor = vuk::BlendFactor::eOne, + .alphaBlendOp = vuk::BlendOp::eAdd, + }; + + cmd_list // + .bind_graphics_pipeline("passes.sky_final") + .set_rasterization({}) + .set_depth_stencil({}) + .set_color_blend(dst, blend_info) + .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) + .set_viewport(0, vuk::Rect2D::framebuffer()) + .set_scissor(0, vuk::Rect2D::framebuffer()) + .bind_sampler(0, 0, linear_clamp_sampler) + .bind_image(0, 1, sky_transmittance_lut) + .bind_image(0, 2, sky_aerial_perspective_lut) + .bind_image(0, 3, sky_view_lut) + .bind_image(0, 4, depth) + .bind_buffer(0, 5, environment) + .bind_buffer(0, 6, camera) + .draw(3, 1, 0, 0); + + return std::make_tuple(dst, depth, environment, camera); + } + ); + + std::tie(dst_attachment, depth_attachment, environment_buffer, camera_buffer) = sky_final_pass( + std::move(dst_attachment), + std::move(sky_transmittance_lut_attachment), + std::move(sky_aerial_perspective_attachment), + std::move(sky_view_lut_attachment), + std::move(depth_attachment), + std::move(environment_buffer), + std::move(camera_buffer) + ); +} + auto SceneRenderer::render(this SceneRenderer &self, vuk::Value &&dst_attachment, SceneRenderInfo &info, PreparedFrame &frame) -> vuk::Value { ZoneScoped; @@ -412,11 +592,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent.width + 63_u32) & ~63_u32, .height = (dst_attachment->extent.height + 63_u32) & ~63_u32, @@ -502,6 +677,8 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valuedevice_address)) + .bind_buffer(0, 0, camera) .draw(3, 1, 0, 0); return std::make_tuple(dst, depth); } ); - //std::tie(result_attachment, depth_attachment) = - // editor_grid_pass(std::move(result_attachment), std::move(depth_attachment), std::move(camera_buffer)); + // std::tie(dst_attachment, depth_attachment) = editor_grid_pass(std::move(dst_attachment), std::move(depth_attachment), std::move(camera_buffer)); if (debugging) { auto debug_pass = vuk::make_pass( diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index eae109e9..59748e8a 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -8,6 +8,7 @@ namespace lr { struct FramePrepareInfo { u32 mesh_instance_count = 0; u32 max_meshlet_instance_count = 0; + bool regenerate_sky = false; ls::span dirty_transform_ids = {}; ls::span gpu_transforms = {}; @@ -34,6 +35,8 @@ struct PreparedFrame { vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; + vuk::Value sky_transmittance_lut = {}; + vuk::Value sky_multiscatter_lut = {}; }; struct SceneRenderInfo { From c659b6d2ec4e60d39c389db0b5ba918094b320d6 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 16 Aug 2025 21:08:58 +0300 Subject: [PATCH 03/27] add window key events --- Lorr/Editor/EditorModule.cc | 4 +- Lorr/Engine/Graphics/ImGuiRenderer.cc | 4 +- Lorr/Engine/Window/Window.cc | 12 ++++++ Lorr/Engine/Window/Window.hh | 56 +++++++++++++++++---------- 4 files changed, 52 insertions(+), 24 deletions(-) diff --git a/Lorr/Editor/EditorModule.cc b/Lorr/Editor/EditorModule.cc index 48ce6419..e3e11ac3 100755 --- a/Lorr/Editor/EditorModule.cc +++ b/Lorr/Editor/EditorModule.cc @@ -365,7 +365,9 @@ static auto draw_welcome_popup(EditorModule &self) -> void { ZoneScoped; auto &window = lr::App::mod(); - ImGui::SetNextWindowSize({ 480.0f, 350.0f }, ImGuiCond_Appearing); + auto center_window = glm::vec2(window.get_size()) * 0.5f; + ImGui::SetNextWindowSize({ 480.0f, 350.0f }, ImGuiCond_Always); + ImGui::SetNextWindowPos({ center_window.x, center_window.y }, ImGuiCond_Always, { 0.5f, 0.5f }); constexpr auto popup_flags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoSavedSettings; if (ImGui::BeginPopupModal("###welcome", nullptr, popup_flags)) { // ── HEADERS ───────────────────────────────────────────────────────── diff --git a/Lorr/Engine/Graphics/ImGuiRenderer.cc b/Lorr/Engine/Graphics/ImGuiRenderer.cc index e4012116..5ecdbb69 100644 --- a/Lorr/Engine/Graphics/ImGuiRenderer.cc +++ b/Lorr/Engine/Graphics/ImGuiRenderer.cc @@ -272,8 +272,8 @@ auto ImGuiRenderer::end_frame(this ImGuiRenderer &self, vuk::Value void { auto &device = App::mod(); device.wait(); + self.width = e.window.data1; + self.height = e.window.data2; + auto surface = self.get_surface(device.get_instance()); self.swap_chain = device.create_swap_chain(surface, std::move(self.swap_chain)).value(); } break; + case SDL_EVENT_KEY_DOWN: { + auto state = KeyState::Up; + state |= e.key.down ? KeyState::Down : KeyState::Up; + state |= e.key.repeat ? KeyState::Repeat : KeyState::Up; + self.key_events.try_emplace(e.key.scancode, e.key.key, e.key.mod, state); + } break; + case SDL_EVENT_KEY_UP: { + self.key_events.try_emplace(e.key.scancode, e.key.key, e.key.mod, KeyState::Up); + } break; case SDL_EVENT_QUIT: { App::close(); } break; diff --git a/Lorr/Engine/Window/Window.hh b/Lorr/Engine/Window/Window.hh index 9129490c..945281d6 100644 --- a/Lorr/Engine/Window/Window.hh +++ b/Lorr/Engine/Window/Window.hh @@ -8,6 +8,40 @@ #include namespace lr { +enum class DialogKind : u32 { + OpenFile = 0, + SaveFile, + OpenFolder, +}; + +struct FileDialogFilter { + std::string_view name = {}; + std::string_view pattern = {}; +}; + +struct ShowDialogInfo { + DialogKind kind = DialogKind::OpenFile; + void *user_data = nullptr; + std::string_view title = {}; + fs::path spawn_path = {}; + ls::span filters = {}; + bool multi_select = false; + void (*callback)(void *user_data, const c8 *const *files, i32 filter) = nullptr; +}; + +enum class KeyState : u32 { + Up = 0, + Down = 1 << 0, + Repeat = 1 << 1, +}; +consteval void enable_bitmask(KeyState); + +struct KeyEvent { + SDL_Keycode key = {}; + SDL_Keymod mod = {}; + KeyState state = KeyState::Up; +}; + enum class WindowCursor { Arrow, TextInput, @@ -42,27 +76,6 @@ struct SystemDisplay { f32 refresh_rate = 30.0f; }; -enum class DialogKind : u32 { - OpenFile = 0, - SaveFile, - OpenFolder, -}; - -struct FileDialogFilter { - std::string_view name = {}; - std::string_view pattern = {}; -}; - -struct ShowDialogInfo { - DialogKind kind = DialogKind::OpenFile; - void *user_data = nullptr; - std::string_view title = {}; - fs::path spawn_path = {}; - ls::span filters = {}; - bool multi_select = false; - void (*callback)(void *user_data, const c8 *const *files, i32 filter) = nullptr; -}; - struct WindowInfo { std::string title = {}; SystemDisplay *display = nullptr; @@ -87,6 +100,7 @@ struct Window { glm::uvec2 cursor_position = {}; std::array cursors = {}; std::vector> event_listeners = {}; + ankerl::unordered_dense::map key_events = {}; static auto init_sdl() -> bool; static auto display_at(i32 monitor_id) -> ls::option; From c43cea7946805e3d8644ab011eb0b9acd6ee6dd4 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Mon, 18 Aug 2025 18:04:10 +0300 Subject: [PATCH 04/27] separate editor camera from core --- Lorr/Editor/Window/SceneBrowserWindow.cc | 1 - Lorr/Editor/Window/ViewportWindow.cc | 65 +++++++------------ Lorr/Editor/Window/ViewportWindow.hh | 4 ++ Lorr/Engine/Asset/Asset.cc | 2 - Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 5 -- Lorr/Engine/Scene/EditorCamera.hh | 17 +++++ Lorr/Engine/Scene/Scene.cc | 45 ++----------- Lorr/Engine/Scene/Scene.hh | 11 ++-- Lorr/Engine/Scene/SceneRenderer.cc | 1 - Lorr/Runtime/RuntimeModule.cc | 7 +- 10 files changed, 60 insertions(+), 98 deletions(-) create mode 100644 Lorr/Engine/Scene/EditorCamera.hh diff --git a/Lorr/Editor/Window/SceneBrowserWindow.cc b/Lorr/Editor/Window/SceneBrowserWindow.cc index 72140321..516ecb67 100755 --- a/Lorr/Editor/Window/SceneBrowserWindow.cc +++ b/Lorr/Editor/Window/SceneBrowserWindow.cc @@ -25,7 +25,6 @@ static auto draw_children(SceneBrowserWindow &self, flecs::entity root) -> void auto q = world // .query_builder() .with(flecs::ChildOf, root) - .without() .build(); ImGui::TableNextRow(); diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index eeca6933..064d90e7 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -124,26 +124,19 @@ static auto draw_tools(ViewportWindow &self) -> void { ImGui::SetNextWindowPos(editor_camera_popup_pos, ImGuiCond_Appearing); ImGui::SetNextWindowSize({ editor_camera_popup_width, 0 }, ImGuiCond_Appearing); if (ImGui::BeginPopup("editor_camera")) { - auto editor_camera = active_scene->get_editor_camera(); - auto *camera_transform = editor_camera.get_mut(); - auto *camera_info = editor_camera.get_mut(); - ImGui::SeparatorText("Position"); - ImGui::drag_vec(0, glm::value_ptr(camera_transform->position), 3, ImGuiDataType_Float); + ImGui::drag_vec(0, glm::value_ptr(self.editor_camera.position), 3, ImGuiDataType_Float); ImGui::SeparatorText("Rotation"); - auto camera_rotation_degrees = glm::degrees(camera_transform->rotation); + auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); ImGui::drag_vec(1, glm::value_ptr(camera_rotation_degrees), 3, ImGuiDataType_Float); - camera_transform->rotation = glm::radians(lr::Math::normalize_180(camera_rotation_degrees)); + self.editor_camera.rotation = glm::radians(lr::Math::normalize_180(camera_rotation_degrees)); ImGui::SeparatorText("FoV"); - ImGui::drag_vec(2, &camera_info->fov, 1, ImGuiDataType_Float); + ImGui::drag_vec(2, &self.editor_camera.fov, 1, ImGuiDataType_Float); ImGui::SeparatorText("Far Clip"); - ImGui::drag_vec(3, &camera_info->far_clip, 1, ImGuiDataType_Float); - - ImGui::SeparatorText("Velocity"); - ImGui::drag_vec(4, &camera_info->velocity_mul, 1, ImGuiDataType_Float); + ImGui::drag_vec(3, &self.editor_camera.far_clip, 1, ImGuiDataType_Float); ImGui::EndPopup(); } @@ -168,7 +161,6 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto *active_scene = asset_man.get_scene(active_project.active_scene_uuid); auto &selected_entity = active_project.selected_entity; - auto editor_camera = active_scene->get_editor_camera(); auto *current_window = ImGui::GetCurrentWindow(); auto window_rect = current_window->InnerRect; auto window_pos = window_rect.Min; @@ -176,11 +168,6 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto work_area_size = ImGui::GetContentRegionAvail(); auto &io = ImGui::GetIO(); - auto *camera = editor_camera.get_mut(); - auto *camera_transform = editor_camera.get_mut(); - camera->resolution = { window_size.x, window_size.y }; - camera->aspect_ratio = window_size.x / window_size.y; - ImGuizmo::SetDrawlist(); ImGuizmo::SetOrthographic(false); ImGuizmo::SetRect(window_pos.x, window_pos.y, window_size.x, window_size.y); @@ -200,7 +187,8 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 requested_texel_transform.emplace(glm::uvec2(mouse_pos_rel.x, mouse_pos_rel.y)); } - auto prepared_frame = active_scene->prepare_frame(scene_renderer); + self.editor_camera.resolution = { window_size.x, window_size.y }; + auto prepared_frame = active_scene->prepare_frame(scene_renderer, self.editor_camera); // NOLINT(cppcoreguidelines-slicing) auto viewport_attachment_info = vuk::ImageAttachment{ .image_type = vuk::ImageType::e2D, @@ -234,9 +222,14 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 } if (selected_entity && selected_entity.has()) { - auto camera_forward = glm::vec3(0.0, 0.0, 1.0) * lr::Math::quat_dir(camera_transform->rotation); - auto camera_projection = glm::perspective(glm::radians(camera->fov), camera->aspect_ratio, camera->far_clip, camera->near_clip); - auto camera_view = glm::lookAt(camera_transform->position, camera_transform->position + camera_forward, glm::vec3(0.0, 1.0, 0.0)); + auto camera_forward = glm::vec3(0.0, 0.0, 1.0) * lr::Math::quat_dir(self.editor_camera.rotation); + auto camera_projection = glm::perspective( + glm::radians(self.editor_camera.fov), + self.editor_camera.aspect_ratio(), + self.editor_camera.far_clip, + self.editor_camera.near_clip + ); + auto camera_view = glm::lookAt(self.editor_camera.position, self.editor_camera.position + camera_forward, glm::vec3(0.0, 1.0, 0.0)); camera_projection[1][1] *= -1.0f; auto *transform = selected_entity.get_mut(); @@ -277,35 +270,21 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 // ── CAMERA CONTROLLER ─────────────────────────────────────────────── if (!ImGuizmo::IsUsingAny() && ImGui::IsWindowHovered()) { - bool reset_z = false; - bool reset_x = false; - + constexpr auto EDITOR_CAMERA_VELOCITY = 2.0f; if (ImGui::IsKeyDown(ImGuiKey_W)) { - camera->axis_velocity.z = -camera->velocity_mul; - reset_z |= true; + self.editor_camera.velocity.z = -EDITOR_CAMERA_VELOCITY; } if (ImGui::IsKeyDown(ImGuiKey_S)) { - camera->axis_velocity.z = camera->velocity_mul; - reset_z |= true; + self.editor_camera.velocity.z = EDITOR_CAMERA_VELOCITY; } if (ImGui::IsKeyDown(ImGuiKey_A)) { - camera->axis_velocity.x = -camera->velocity_mul; - reset_x |= true; + self.editor_camera.velocity.x = -EDITOR_CAMERA_VELOCITY; } if (ImGui::IsKeyDown(ImGuiKey_D)) { - camera->axis_velocity.x = camera->velocity_mul; - reset_x |= true; - } - - if (!reset_z) { - camera->axis_velocity.z = 0.0; - } - - if (!reset_x) { - camera->axis_velocity.x = 0.0; + self.editor_camera.velocity.x = EDITOR_CAMERA_VELOCITY; } if (ImGui::IsMouseDragging(ImGuiMouseButton_Right)) { @@ -313,10 +292,10 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 ImGui::ResetMouseDragDelta(ImGuiMouseButton_Right); auto sensitivity = 0.1f; - auto camera_rotation_degrees = glm::degrees(camera_transform->rotation); + auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); camera_rotation_degrees.x += drag.x * sensitivity; camera_rotation_degrees.y += drag.y * sensitivity; - camera_transform->rotation = glm::radians(camera_rotation_degrees); + self.editor_camera.rotation = glm::radians(camera_rotation_degrees); } } } diff --git a/Lorr/Editor/Window/ViewportWindow.hh b/Lorr/Editor/Window/ViewportWindow.hh index d2040100..cf7232e7 100644 --- a/Lorr/Editor/Window/ViewportWindow.hh +++ b/Lorr/Editor/Window/ViewportWindow.hh @@ -1,10 +1,14 @@ #pragma once #include "Editor/Window/IWindow.hh" +#include "Engine/Scene/EditorCamera.hh" + +#include namespace led { struct ViewportWindow : IWindow { u32 gizmo_op = 0; + lr::EditorCamera editor_camera = {}; ViewportWindow(std::string name_, bool open_ = true); diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index dd430904..71691ccf 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -252,8 +252,6 @@ auto AssetManager::init_new_scene(this AssetManager &self, const UUID &uuid, con return false; } - scene->create_editor_camera(); - asset->acquire_ref(); return true; } diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index 9821a390..80e1137a 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -24,7 +24,6 @@ ECS_COMPONENT_END(); ECS_COMPONENT_BEGIN(Camera) ECS_COMPONENT_MEMBER(fov, f32, 90.0f) ECS_COMPONENT_MEMBER(resolution, glm::vec2, {}) - ECS_COMPONENT_MEMBER(aspect_ratio, f32, 1.777f) ECS_COMPONENT_MEMBER(near_clip, f32, 0.1f) ECS_COMPONENT_MEMBER(far_clip, f32, 1000.0f) ECS_COMPONENT_MEMBER(axis_velocity, glm::vec3, { 0.0, 0.0, 0.0 }) @@ -36,7 +35,6 @@ ECS_COMPONENT_END(); ECS_COMPONENT_TAG(PerspectiveCamera); ECS_COMPONENT_TAG(OrthographicCamera); ECS_COMPONENT_TAG(ActiveCamera); -ECS_COMPONENT_TAG(EditorCamera); ECS_COMPONENT_BEGIN(RenderingMesh) ECS_COMPONENT_MEMBER(model_uuid, UUID, {}) @@ -66,7 +64,4 @@ ECS_COMPONENT_BEGIN(Environment) ECS_COMPONENT_MEMBER(eye_k, f32, 12.5f) ECS_COMPONENT_END(); -// Any entity with this tag won't be serialized -ECS_COMPONENT_TAG(Hidden); - // clang-format on diff --git a/Lorr/Engine/Scene/EditorCamera.hh b/Lorr/Engine/Scene/EditorCamera.hh new file mode 100644 index 00000000..e701e299 --- /dev/null +++ b/Lorr/Engine/Scene/EditorCamera.hh @@ -0,0 +1,17 @@ +#pragma once + +#include "Engine/Scene/GPUScene.hh" + +namespace lr { +struct EditorCamera : GPU::Camera { + glm::vec3 position = {}; + glm::vec3 rotation = {}; + glm::vec3 velocity = {}; + f32 fov = 65.0f; + + auto aspect_ratio() -> f32 { + return resolution.x / resolution.y; + } +}; + +} // namespace lr diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index a1a718c6..13ed6d3a 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -98,17 +98,6 @@ auto Scene::init(this Scene &self, const std::string &name) -> bool { } }); - self.world->observer() - .event(flecs::Monitor) // - .each([&self](flecs::iter &it, usize i, ECS::EditorCamera) { - auto entity = it.entity(i); - if (it.event() == flecs::OnAdd) { - self.editor_camera = entity; - } else if (it.event() == flecs::OnRemove) { - self.editor_camera.clear(); - } - }); - self.world ->system() // .each([&](flecs::iter &it, usize, ECS::Transform &t, ECS::Camera &c) { @@ -417,30 +406,15 @@ auto Scene::delete_entity(this Scene &, flecs::entity entity) -> void { entity.destruct(); } -auto Scene::create_perspective_camera( - this Scene &self, - const std::string &name, - const glm::vec3 &position, - const glm::vec3 &rotation, - f32 fov, - f32 aspect_ratio -) -> flecs::entity { +auto Scene::create_perspective_camera(this Scene &self, const std::string &name, const glm::vec3 &position, const glm::vec3 &rotation, f32 fov) + -> flecs::entity { ZoneScoped; return self .create_entity(name) // .add() .set({ .position = position, .rotation = glm::radians(Math::normalize_180(rotation)) }) - .set({ .fov = fov, .aspect_ratio = aspect_ratio }); -} - -auto Scene::create_editor_camera(this Scene &self) -> void { - ZoneScoped; - - self.create_perspective_camera("editor_camera", { 0.0, 2.0, 0.0 }, { 0, 0, 0 }, 65.0, 1.6) - .add() - .add() - .add() + .set({ .fov = fov }) .child_of(self.root); } @@ -595,12 +569,6 @@ auto Scene::get_world(this Scene &self) -> flecs::world & { return self.world.value(); } -auto Scene::get_editor_camera(this Scene &self) -> flecs::entity { - ZoneScoped; - - return self.editor_camera; -} - auto Scene::get_name(this Scene &self) -> const std::string & { ZoneScoped; @@ -621,7 +589,7 @@ auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } -auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> PreparedFrame { +auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, GPU::Camera fallback_camera) -> PreparedFrame { ZoneScoped; auto &asset_man = App::mod(); @@ -640,7 +608,8 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared ls::option active_camera_data = ls::nullopt; camera_query.each([&active_camera_data](flecs::entity, ECS::Transform &t, ECS::Camera &c, ECS::ActiveCamera) { - auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), c.aspect_ratio, c.far_clip, c.near_clip); + auto aspect_ratio = c.resolution.x / c.resolution.y; + auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), aspect_ratio, c.far_clip, c.near_clip); projection_mat[1][1] *= -1; auto translation_mat = glm::translate(glm::mat4(1.0f), -t.position); @@ -800,7 +769,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared .gpu_meshes = gpu_meshes, .gpu_mesh_instances = gpu_mesh_instances, .environment = environment, - .camera = active_camera_data.value_or(GPU::Camera{}), + .camera = active_camera_data.value_or(fallback_camera), }; auto prepared_frame = renderer.prepare_frame(prepare_info); diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 6fc762ae..bcc1d6ef 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -43,7 +43,6 @@ private: flecs::entity root = {}; ls::option world = ls::nullopt; SceneEntityDB entity_db = {}; - flecs::entity editor_camera = {}; SlotMap transforms = {}; ankerl::unordered_dense::map entity_transforms_map = {}; @@ -67,10 +66,8 @@ public: auto create_entity(this Scene &, const std::string &name = {}) -> flecs::entity; auto delete_entity(this Scene &, flecs::entity entity) -> void; - // clang-format off - auto create_perspective_camera(this Scene &, const std::string &name, const glm::vec3 &position, const glm::vec3 &rotation, f32 fov, f32 aspect_ratio) -> flecs::entity; - // clang-format on - auto create_editor_camera(this Scene &) -> void; + auto create_perspective_camera(this Scene &, const std::string &name, const glm::vec3 &position, const glm::vec3 &rotation, f32 fov) + -> flecs::entity; // Model = collection of meshes. // This function imports every mesh inside the model asset. // The returning entity is a parent, "model" entity where each of @@ -81,7 +78,8 @@ public: auto find_entity(this Scene &, std::string_view name) -> flecs::entity; auto find_entity(this Scene &, u32 transform_index) -> flecs::entity; - auto prepare_frame(this Scene &, SceneRenderer &renderer) -> PreparedFrame; + // If we really want to render something, camera needs to be there + auto prepare_frame(this Scene &, SceneRenderer &renderer, GPU::Camera fallback_camera = {}) -> PreparedFrame; auto tick(this Scene &, f32 delta_time) -> bool; auto set_name(this Scene &, const std::string &name) -> void; @@ -89,7 +87,6 @@ public: auto get_root(this Scene &) -> flecs::entity; auto get_world(this Scene &) -> flecs::world &; - auto get_editor_camera(this Scene &) -> flecs::entity; auto get_name(this Scene &) -> const std::string &; auto get_name_sv(this Scene &) -> std::string_view; auto get_entity_db(this Scene &) -> SceneEntityDB &; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index ef53bcec..3ffc6a49 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -174,7 +174,6 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, copy_pipeline_info).value(); - // ── FFX ───────────────────────────────────────────────────────────── auto hiz_pipeline_info = PipelineCompileInfo{ .module_name = "passes.hiz", .entry_points = { "cs_main" }, diff --git a/Lorr/Runtime/RuntimeModule.cc b/Lorr/Runtime/RuntimeModule.cc index 8b60dbfa..74e3e64c 100644 --- a/Lorr/Runtime/RuntimeModule.cc +++ b/Lorr/Runtime/RuntimeModule.cc @@ -7,6 +7,12 @@ #include "Engine/Scene/ECSModule/Core.hh" #include "Engine/Window/Window.hh" +struct Runtime { + Runtime(flecs::world &world) { + world.component(""); + } +}; + auto RuntimeModule::init(this RuntimeModule &self) -> bool { LOG_TRACE("Actvie world: {}", self.world_path); @@ -36,7 +42,6 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { camera_query.each([&window](flecs::entity, lr::ECS::Camera &c, lr::ECS::ActiveCamera) { c.resolution = glm::vec2(window.width, window.height); - c.aspect_ratio = c.resolution.x / c.resolution.y; }); active_scene->tick(static_cast(delta_time)); From 2f01427aefc620d8b1c4f298f48784438b372aeb Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Mon, 18 Aug 2025 22:33:23 +0300 Subject: [PATCH 05/27] editor camera matrix --- Lorr/Editor/Window/ViewportWindow.cc | 33 ++++++++++++++++++++++------ Lorr/Engine/Scene/EditorCamera.hh | 1 - Lorr/Engine/Scene/GPUScene.hh | 4 ++-- Lorr/Engine/Scene/Scene.cc | 2 +- xmake/packages.lua | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index 064d90e7..25eede6d 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -9,6 +9,8 @@ #include "Engine/Asset/Asset.hh" #include "Engine/Core/App.hh" +#include "Engine/Math/Quat.hh" + #include #include @@ -187,7 +189,29 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 requested_texel_transform.emplace(glm::uvec2(mouse_pos_rel.x, mouse_pos_rel.y)); } - self.editor_camera.resolution = { window_size.x, window_size.y }; + { + // Update editor camera + self.editor_camera.resolution = { window_size.x, window_size.y }; + auto projection_mat = glm::perspective( + glm::radians(self.editor_camera.fov), + self.editor_camera.aspect_ratio(), + self.editor_camera.far_clip, + self.editor_camera.near_clip + ); + projection_mat[1][1] *= -1.0f; + + auto translation_mat = glm::translate(glm::mat4(1.0f), -self.editor_camera.position); + auto rotation_mat = glm::mat4_cast(lr::Math::quat_dir(self.editor_camera.rotation)); + auto view_mat = rotation_mat * translation_mat; + auto projection_view_mat = projection_mat * view_mat; + self.editor_camera.projection_mat = projection_mat; + self.editor_camera.view_mat = view_mat; + self.editor_camera.projection_view_mat = projection_mat * view_mat; + self.editor_camera.inv_view_mat = glm::inverse(view_mat); + self.editor_camera.inv_projection_view_mat = glm::inverse(projection_view_mat); + self.editor_camera.acceptable_lod_error = 2.0f; + } + auto prepared_frame = active_scene->prepare_frame(scene_renderer, self.editor_camera); // NOLINT(cppcoreguidelines-slicing) auto viewport_attachment_info = vuk::ImageAttachment{ @@ -223,12 +247,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 if (selected_entity && selected_entity.has()) { auto camera_forward = glm::vec3(0.0, 0.0, 1.0) * lr::Math::quat_dir(self.editor_camera.rotation); - auto camera_projection = glm::perspective( - glm::radians(self.editor_camera.fov), - self.editor_camera.aspect_ratio(), - self.editor_camera.far_clip, - self.editor_camera.near_clip - ); + auto camera_projection = glm::perspective(glm::radians(self.editor_camera.fov), self.editor_camera.aspect_ratio(), self.editor_camera.far_clip, self.editor_camera.near_clip); auto camera_view = glm::lookAt(self.editor_camera.position, self.editor_camera.position + camera_forward, glm::vec3(0.0, 1.0, 0.0)); camera_projection[1][1] *= -1.0f; diff --git a/Lorr/Engine/Scene/EditorCamera.hh b/Lorr/Engine/Scene/EditorCamera.hh index e701e299..767e28be 100644 --- a/Lorr/Engine/Scene/EditorCamera.hh +++ b/Lorr/Engine/Scene/EditorCamera.hh @@ -4,7 +4,6 @@ namespace lr { struct EditorCamera : GPU::Camera { - glm::vec3 position = {}; glm::vec3 rotation = {}; glm::vec3 velocity = {}; f32 fov = 65.0f; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index a5eb978c..554bb063 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -113,8 +113,8 @@ struct Camera { alignas(4) glm::mat4 inv_projection_view_mat = {}; alignas(4) glm::mat4 frustum_projection_view_mat = {}; alignas(4) glm::vec3 position = {}; - alignas(4) f32 near_clip = {}; - alignas(4) f32 far_clip = {}; + alignas(4) f32 near_clip = 0.01f; + alignas(4) f32 far_clip = 1000.0f; alignas(4) glm::vec2 resolution = {}; alignas(4) f32 acceptable_lod_error = 0.0f; }; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 13ed6d3a..6ad8e450 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -190,7 +190,7 @@ static auto json_to_entity(Scene &self, flecs::entity root, simdjson::ondemand:: auto component_id = world.lookup(component_name); if (!component_id) { LOG_ERROR("Entity '{}' has invalid component named '{}'!", e.name(), component_name); - return false; + continue; } LS_EXPECT(self.get_entity_db().is_component_known(component_id)); diff --git a/xmake/packages.lua b/xmake/packages.lua index 328b427a..7c01f1e9 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -60,6 +60,6 @@ add_requires("vuk 2025.07.09", { configs = { }, debug = is_mode("debug") }) add_requires("meshoptimizer v0.24") -add_requires("ktx v4.4.0") +add_requires("ktx v4.4.0", { debug = false }) add_requires("svector v1.0.3") From 2b499713a9ec3aa3c6f894b4bab35f4ae028a770 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 19 Aug 2025 14:42:07 +0300 Subject: [PATCH 06/27] smooth editor camera --- Lorr/Editor/Window/ViewportWindow.cc | 83 +++++++++++-------- .../Resources/shaders/passes/tonemap.slang | 12 +-- Lorr/Engine/Scene/Scene.cc | 54 ++++++------ Lorr/Engine/Scene/Scene.hh | 2 +- 4 files changed, 84 insertions(+), 67 deletions(-) diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index 25eede6d..3af8b0d0 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -169,6 +169,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto window_size = window_rect.GetSize(); auto work_area_size = ImGui::GetContentRegionAvail(); auto &io = ImGui::GetIO(); + auto delta_time = io.DeltaTime; ImGuizmo::SetDrawlist(); ImGuizmo::SetOrthographic(false); @@ -190,9 +191,48 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 } { + constexpr auto EDITOR_CAMERA_MAX_VELOCITY = 2.0f; + constexpr auto EDITOR_CAMERA_ACCELERATION = 8.0f; + constexpr auto EDITOR_CAMERA_DECELERATION = 12.0f; + // Update editor camera + auto target_velocity = glm::vec3(0.0f); + if (!ImGuizmo::IsUsingAny() && ImGui::IsWindowHovered()) { + if (ImGui::IsKeyDown(ImGuiKey_W)) { + target_velocity.z = -EDITOR_CAMERA_MAX_VELOCITY; + } + + if (ImGui::IsKeyDown(ImGuiKey_S)) { + target_velocity.z = EDITOR_CAMERA_MAX_VELOCITY; + } + + if (ImGui::IsKeyDown(ImGuiKey_A)) { + target_velocity.x = -EDITOR_CAMERA_MAX_VELOCITY; + } + + if (ImGui::IsKeyDown(ImGuiKey_D)) { + target_velocity.x = EDITOR_CAMERA_MAX_VELOCITY; + } + + if (ImGui::IsMouseDragging(ImGuiMouseButton_Right)) { + auto drag = ImGui::GetMouseDragDelta(ImGuiMouseButton_Right, 0); + ImGui::ResetMouseDragDelta(ImGuiMouseButton_Right); + + auto sensitivity = 0.1f; + auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); + camera_rotation_degrees.x += drag.x * sensitivity; + camera_rotation_degrees.y += drag.y * sensitivity; + self.editor_camera.rotation = glm::radians(camera_rotation_degrees); + } + } + + auto inv_orient = glm::conjugate(lr::Math::quat_dir(self.editor_camera.rotation)); + auto acceleration_rate = (glm::length(target_velocity) > 0.0f) ? EDITOR_CAMERA_ACCELERATION : EDITOR_CAMERA_DECELERATION; + self.editor_camera.velocity = glm::mix(self.editor_camera.velocity, target_velocity, glm::min(1.0f, acceleration_rate * delta_time)); + self.editor_camera.position += inv_orient * self.editor_camera.velocity * delta_time; + self.editor_camera.resolution = { window_size.x, window_size.y }; - auto projection_mat = glm::perspective( + auto projection_mat = glm::perspectiveRH_ZO( glm::radians(self.editor_camera.fov), self.editor_camera.aspect_ratio(), self.editor_camera.far_clip, @@ -204,6 +244,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto rotation_mat = glm::mat4_cast(lr::Math::quat_dir(self.editor_camera.rotation)); auto view_mat = rotation_mat * translation_mat; auto projection_view_mat = projection_mat * view_mat; + self.editor_camera.projection_mat = projection_mat; self.editor_camera.view_mat = view_mat; self.editor_camera.projection_view_mat = projection_mat * view_mat; @@ -226,7 +267,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 }; auto viewport_attachment = vuk::declare_ia("viewport", viewport_attachment_info); auto scene_render_info = lr::SceneRenderInfo{ - .delta_time = ImGui::GetIO().DeltaTime, + .delta_time = delta_time, .cull_flags = active_scene->get_cull_flags(), .picking_texel = requested_texel_transform, }; @@ -247,7 +288,12 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 if (selected_entity && selected_entity.has()) { auto camera_forward = glm::vec3(0.0, 0.0, 1.0) * lr::Math::quat_dir(self.editor_camera.rotation); - auto camera_projection = glm::perspective(glm::radians(self.editor_camera.fov), self.editor_camera.aspect_ratio(), self.editor_camera.far_clip, self.editor_camera.near_clip); + auto camera_projection = glm::perspective( + glm::radians(self.editor_camera.fov), + self.editor_camera.aspect_ratio(), + self.editor_camera.far_clip, + self.editor_camera.near_clip + ); auto camera_view = glm::lookAt(self.editor_camera.position, self.editor_camera.position + camera_forward, glm::vec3(0.0, 1.0, 0.0)); camera_projection[1][1] *= -1.0f; @@ -286,37 +332,6 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 selected_entity.modified(); } } - - // ── CAMERA CONTROLLER ─────────────────────────────────────────────── - if (!ImGuizmo::IsUsingAny() && ImGui::IsWindowHovered()) { - constexpr auto EDITOR_CAMERA_VELOCITY = 2.0f; - if (ImGui::IsKeyDown(ImGuiKey_W)) { - self.editor_camera.velocity.z = -EDITOR_CAMERA_VELOCITY; - } - - if (ImGui::IsKeyDown(ImGuiKey_S)) { - self.editor_camera.velocity.z = EDITOR_CAMERA_VELOCITY; - } - - if (ImGui::IsKeyDown(ImGuiKey_A)) { - self.editor_camera.velocity.x = -EDITOR_CAMERA_VELOCITY; - } - - if (ImGui::IsKeyDown(ImGuiKey_D)) { - self.editor_camera.velocity.x = EDITOR_CAMERA_VELOCITY; - } - - if (ImGui::IsMouseDragging(ImGuiMouseButton_Right)) { - auto drag = ImGui::GetMouseDragDelta(ImGuiMouseButton_Right, 0); - ImGui::ResetMouseDragDelta(ImGuiMouseButton_Right); - - auto sensitivity = 0.1f; - auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); - camera_rotation_degrees.x += drag.x * sensitivity; - camera_rotation_degrees.y += drag.y * sensitivity; - self.editor_camera.rotation = glm::radians(camera_rotation_degrees); - } - } } ViewportWindow::ViewportWindow(std::string name_, bool open_) : IWindow(std::move(name_), open_) { diff --git a/Lorr/Engine/Resources/shaders/passes/tonemap.slang b/Lorr/Engine/Resources/shaders/passes/tonemap.slang index ebd01f22..791f9765 100644 --- a/Lorr/Engine/Resources/shaders/passes/tonemap.slang +++ b/Lorr/Engine/Resources/shaders/passes/tonemap.slang @@ -561,11 +561,11 @@ f32x4 fs_main(VertexOutput input) { //color = ACES_Film(color); //color = ACES_Fitted(color); //color = PBRNeutralToneMapping(color); // this looks like shit, figure out why - // color = agx_tonemapping(color); - GT7ToneMapping gt7; - gt7.initializeAsSDR(); - f32x3 gt7_color; - gt7.applyToneMapping(color, gt7_color); + color = agx_tonemapping(color); + //GT7ToneMapping gt7; + //gt7.initializeAsSDR(); + //f32x3 gt7_color; + //gt7.applyToneMapping(color, gt7_color); - return f32x4(gt7_color, 1.0); + return f32x4(color, 1.0); } diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 6ad8e450..e7c7e817 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -589,7 +589,7 @@ auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } -auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, GPU::Camera fallback_camera) -> PreparedFrame { +auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option override_camera) -> PreparedFrame { ZoneScoped; auto &asset_man = App::mod(); @@ -606,30 +606,32 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, GPU::Camera .build(); // clang-format on - ls::option active_camera_data = ls::nullopt; - camera_query.each([&active_camera_data](flecs::entity, ECS::Transform &t, ECS::Camera &c, ECS::ActiveCamera) { - auto aspect_ratio = c.resolution.x / c.resolution.y; - auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), aspect_ratio, c.far_clip, c.near_clip); - projection_mat[1][1] *= -1; - - auto translation_mat = glm::translate(glm::mat4(1.0f), -t.position); - auto rotation_mat = glm::mat4_cast(Math::quat_dir(t.rotation)); - auto view_mat = rotation_mat * translation_mat; - - auto &camera_data = active_camera_data.emplace(GPU::Camera{}); - camera_data.projection_mat = projection_mat; - camera_data.view_mat = view_mat; - camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; - camera_data.inv_view_mat = glm::inverse(camera_data.view_mat); - camera_data.inv_projection_view_mat = glm::inverse(camera_data.projection_view_mat); - camera_data.position = t.position; - camera_data.near_clip = c.near_clip; - camera_data.far_clip = c.far_clip; - camera_data.resolution = c.resolution; - camera_data.acceptable_lod_error = c.acceptable_lod_error; - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - c.frustum_projection_view_mat = camera_data.projection_view_mat; - }); + ls::option active_camera_data = override_camera; + if (!active_camera_data.has_value()) { + camera_query.each([&active_camera_data](flecs::entity, ECS::Transform &t, ECS::Camera &c, ECS::ActiveCamera) { + auto aspect_ratio = c.resolution.x / c.resolution.y; + auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), aspect_ratio, c.far_clip, c.near_clip); + projection_mat[1][1] *= -1; + + auto translation_mat = glm::translate(glm::mat4(1.0f), -t.position); + auto rotation_mat = glm::mat4_cast(Math::quat_dir(t.rotation)); + auto view_mat = rotation_mat * translation_mat; + + auto &camera_data = active_camera_data.emplace(GPU::Camera{}); + camera_data.projection_mat = projection_mat; + camera_data.view_mat = view_mat; + camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; + camera_data.inv_view_mat = glm::inverse(camera_data.view_mat); + camera_data.inv_projection_view_mat = glm::inverse(camera_data.projection_view_mat); + camera_data.position = t.position; + camera_data.near_clip = c.near_clip; + camera_data.far_clip = c.far_clip; + camera_data.resolution = c.resolution; + camera_data.acceptable_lod_error = c.acceptable_lod_error; + camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; + c.frustum_projection_view_mat = camera_data.projection_view_mat; + }); + } GPU::Environment environment = {}; environment_query.each([&environment](flecs::entity, ECS::Environment &environment_comp) { @@ -769,7 +771,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, GPU::Camera .gpu_meshes = gpu_meshes, .gpu_mesh_instances = gpu_mesh_instances, .environment = environment, - .camera = active_camera_data.value_or(fallback_camera), + .camera = active_camera_data.value_or(GPU::Camera{}), }; auto prepared_frame = renderer.prepare_frame(prepare_info); diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index bcc1d6ef..02f00ce5 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -79,7 +79,7 @@ public: auto find_entity(this Scene &, u32 transform_index) -> flecs::entity; // If we really want to render something, camera needs to be there - auto prepare_frame(this Scene &, SceneRenderer &renderer, GPU::Camera fallback_camera = {}) -> PreparedFrame; + auto prepare_frame(this Scene &, SceneRenderer &renderer, ls::option override_camera = ls::nullopt) -> PreparedFrame; auto tick(this Scene &, f32 delta_time) -> bool; auto set_name(this Scene &, const std::string &name) -> void; From 6f5067e219fba96ee1b388bed6265b8c510403be Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 19 Aug 2025 20:22:12 +0300 Subject: [PATCH 07/27] add hiz fallback --- Lorr/Editor/Window/ViewportWindow.cc | 52 +++++++------------ Lorr/Engine/Resources/shaders/cull.slang | 2 +- .../Engine/Resources/shaders/passes/hiz.slang | 8 +-- .../Resources/shaders/passes/hiz_slow.slang | 21 ++++++++ Lorr/Engine/Scene/EditorCamera.cc | 28 ++++++++++ Lorr/Engine/Scene/EditorCamera.hh | 5 ++ Lorr/Engine/Scene/SceneRenderer.cc | 46 ++++++++++++++-- 7 files changed, 120 insertions(+), 42 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/passes/hiz_slow.slang create mode 100644 Lorr/Engine/Scene/EditorCamera.cc diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index 3af8b0d0..37f6480b 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -140,16 +140,21 @@ static auto draw_tools(ViewportWindow &self) -> void { ImGui::SeparatorText("Far Clip"); ImGui::drag_vec(3, &self.editor_camera.far_clip, 1, ImGuiDataType_Float); + ImGui::SeparatorText("Max Velocity"); + ImGui::drag_vec(4, &self.editor_camera.max_velocity, 1, ImGuiDataType_Float); + ImGui::EndPopup(); } if (editor.show_debug) { auto &cull_flags = reinterpret_cast(active_scene->get_cull_flags()); + auto &scene_renderer = lr::App::mod(); + ImGui::CheckboxFlags("Cull Meshlet Frustum", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MeshletFrustum)); ImGui::CheckboxFlags("Cull Triangle Back Face", &cull_flags, std::to_underlying(lr::GPU::CullFlags::TriangleBackFace)); ImGui::CheckboxFlags("Cull Micro Triangles", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MicroTriangles)); ImGui::CheckboxFlags("Cull Occlusion", &cull_flags, std::to_underlying(lr::GPU::CullFlags::Occlusion)); - // ImGui::Checkbox("Debug Lines", &editor.scene_renderer.debug_lines); + ImGui::Checkbox("Debug Lines", &scene_renderer.debug_lines); } } @@ -191,27 +196,31 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 } { - constexpr auto EDITOR_CAMERA_MAX_VELOCITY = 2.0f; - constexpr auto EDITOR_CAMERA_ACCELERATION = 8.0f; - constexpr auto EDITOR_CAMERA_DECELERATION = 12.0f; - // Update editor camera auto target_velocity = glm::vec3(0.0f); if (!ImGuizmo::IsUsingAny() && ImGui::IsWindowHovered()) { if (ImGui::IsKeyDown(ImGuiKey_W)) { - target_velocity.z = -EDITOR_CAMERA_MAX_VELOCITY; + target_velocity.z = -self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_S)) { - target_velocity.z = EDITOR_CAMERA_MAX_VELOCITY; + target_velocity.z = self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_A)) { - target_velocity.x = -EDITOR_CAMERA_MAX_VELOCITY; + target_velocity.x = -self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_D)) { - target_velocity.x = EDITOR_CAMERA_MAX_VELOCITY; + target_velocity.x = self.editor_camera.max_velocity; + } + + if (ImGui::IsKeyDown(ImGuiKey_E)) { + target_velocity.y = self.editor_camera.max_velocity; + } + + if (ImGui::IsKeyDown(ImGuiKey_Q)) { + target_velocity.y = -self.editor_camera.max_velocity; } if (ImGui::IsMouseDragging(ImGuiMouseButton_Right)) { @@ -226,31 +235,8 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 } } - auto inv_orient = glm::conjugate(lr::Math::quat_dir(self.editor_camera.rotation)); - auto acceleration_rate = (glm::length(target_velocity) > 0.0f) ? EDITOR_CAMERA_ACCELERATION : EDITOR_CAMERA_DECELERATION; - self.editor_camera.velocity = glm::mix(self.editor_camera.velocity, target_velocity, glm::min(1.0f, acceleration_rate * delta_time)); - self.editor_camera.position += inv_orient * self.editor_camera.velocity * delta_time; - self.editor_camera.resolution = { window_size.x, window_size.y }; - auto projection_mat = glm::perspectiveRH_ZO( - glm::radians(self.editor_camera.fov), - self.editor_camera.aspect_ratio(), - self.editor_camera.far_clip, - self.editor_camera.near_clip - ); - projection_mat[1][1] *= -1.0f; - - auto translation_mat = glm::translate(glm::mat4(1.0f), -self.editor_camera.position); - auto rotation_mat = glm::mat4_cast(lr::Math::quat_dir(self.editor_camera.rotation)); - auto view_mat = rotation_mat * translation_mat; - auto projection_view_mat = projection_mat * view_mat; - - self.editor_camera.projection_mat = projection_mat; - self.editor_camera.view_mat = view_mat; - self.editor_camera.projection_view_mat = projection_mat * view_mat; - self.editor_camera.inv_view_mat = glm::inverse(view_mat); - self.editor_camera.inv_projection_view_mat = glm::inverse(projection_view_mat); - self.editor_camera.acceptable_lod_error = 2.0f; + self.editor_camera.update(delta_time, target_velocity); } auto prepared_frame = active_scene->prepare_frame(scene_renderer, self.editor_camera); // NOLINT(cppcoreguidelines-slicing) diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index b84547a7..315f3a09 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -101,7 +101,7 @@ public func test_occlusion( let size = max_texel - min_texel + 1; let max_size = max(size.x, size.y); - var mip = firstbithigh(max_size - 1) - 1; + var mip = firstbithigh(max_size - 1); let smin = min_texel >> mip; let smax = max_texel >> mip; if (any(smax - smin > 1)) { diff --git a/Lorr/Engine/Resources/shaders/passes/hiz.slang b/Lorr/Engine/Resources/shaders/passes/hiz.slang index 2f21880d..0d166b06 100644 --- a/Lorr/Engine/Resources/shaders/passes/hiz.slang +++ b/Lorr/Engine/Resources/shaders/passes/hiz.slang @@ -3,10 +3,10 @@ module hiz; import std; import gpu; -// Do not remove this comment: -// Taken from: https://github.dev/SparkyPotato/radiance/blob/main/shaders/passes/mesh/hzb.slang, -// which is based on https://github.com/Themaister/Granite/blob/master/assets/shaders/post/hiz.comp, -// which is HiZ modification of AMD's Single Pass Downsampler. +// Credits: +// - https://github.dev/SparkyPotato/radiance/blob/main/shaders/passes/mesh/hzb.slang, +// - https://github.com/Themaister/Granite/blob/master/assets/shaders/post/hiz.comp, +// - AMD's Single Pass Downsampler. [[vk::binding(0)]] globallycoherent RWStructuredBuffer spd_global_atomic; diff --git a/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang new file mode 100644 index 00000000..efafd6cc --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang @@ -0,0 +1,21 @@ +module hiz; + +import std; +import gpu; + +struct ShaderParameters { + Sampler sampler; + Image2D src_image; + StorageImage2D dst_mip; +} + +[[shader("compute")]] +[[numthreads(32, 32, 1)]] +func cs_main( + u32x2 thread_id : SV_DispatchThreadID, + uniform ParameterBlock params, + uniform u32x2 src_image_size +) -> void { + let c = params.src_image.sample(params.sampler, (f32x2(thread_id) + 0.5) / f32x2(src_image_size)).r; + params.dst_mip.store(thread_id.xy, c); +} diff --git a/Lorr/Engine/Scene/EditorCamera.cc b/Lorr/Engine/Scene/EditorCamera.cc new file mode 100644 index 00000000..26956765 --- /dev/null +++ b/Lorr/Engine/Scene/EditorCamera.cc @@ -0,0 +1,28 @@ +#include "Engine/Scene/EditorCamera.hh" + +namespace lr { +auto EditorCamera::update(this EditorCamera &self, f32 delta_time, const glm::vec3 &target_velocity) -> void { + ZoneScoped; + + auto inv_orient = glm::conjugate(lr::Math::quat_dir(self.rotation)); + auto acceleration_rate = glm::length(target_velocity) > 0.0f ? self.accel_speed : self.decel_speed; + self.velocity = glm::mix(self.velocity, target_velocity, glm::min(1.0f, acceleration_rate * delta_time)); + self.position += inv_orient * self.velocity * delta_time; + + auto projection_mat = glm::perspectiveRH_ZO(glm::radians(self.fov), self.aspect_ratio(), self.far_clip, self.near_clip); + projection_mat[1][1] *= -1.0f; + + auto translation_mat = glm::translate(glm::mat4(1.0f), -self.position); + auto rotation_mat = glm::mat4_cast(lr::Math::quat_dir(self.rotation)); + auto view_mat = rotation_mat * translation_mat; + auto projection_view_mat = projection_mat * view_mat; + + self.frustum_projection_view_mat = self.projection_view_mat; + self.projection_mat = projection_mat; + self.view_mat = view_mat; + self.projection_view_mat = projection_mat * view_mat; + self.inv_view_mat = glm::inverse(view_mat); + self.inv_projection_view_mat = glm::inverse(projection_view_mat); + self.acceptable_lod_error = 2.0f; +} +} // namespace lr diff --git a/Lorr/Engine/Scene/EditorCamera.hh b/Lorr/Engine/Scene/EditorCamera.hh index 767e28be..990ea5cb 100644 --- a/Lorr/Engine/Scene/EditorCamera.hh +++ b/Lorr/Engine/Scene/EditorCamera.hh @@ -7,6 +7,11 @@ struct EditorCamera : GPU::Camera { glm::vec3 rotation = {}; glm::vec3 velocity = {}; f32 fov = 65.0f; + f32 max_velocity = 2.0f; + f32 accel_speed = 8.0f; + f32 decel_speed = 12.0f; + + auto update(this EditorCamera &, f32 delta_time, const glm::vec3 &target_velocity) -> void; auto aspect_ratio() -> f32 { return resolution.x / resolution.y; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 3ffc6a49..b7bf668d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -180,6 +180,12 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, hiz_pipeline_info).value(); + auto hiz_slow_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.hiz_slow", + .entry_points = { "cs_main" }, + }; + Pipeline::create(device, default_slang_session, hiz_slow_pipeline_info).value(); + self.histogram_luminance_buffer = Buffer::create(device, sizeof(GPU::HistogramLuminance)).value(); vuk::fill(vuk::acquire_buf("histogram luminance", *device.buffer(self.histogram_luminance_buffer.id()), vuk::eNone), 0); @@ -507,7 +513,7 @@ static auto draw_sky( auto sky_final_pass = vuk::make_pass( "sky final", [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eColorWrite) dst, + VUK_IA(vuk::eColorRW) dst, VUK_IA(vuk::eFragmentSampled) sky_transmittance_lut, VUK_IA(vuk::eFragmentSampled) sky_aerial_perspective_lut, VUK_IA(vuk::eFragmentSampled) sky_view_lut, @@ -1071,6 +1077,38 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent; + auto mip_count = dst->level_count; + + cmd_list // + .bind_compute_pipeline("passes.hiz_slow") + .bind_sampler(0, 0, hiz_sampler_info); + + for (auto i = 0_u32; i < mip_count; i++) { + auto mip_width = std::max(1_u32, extent.width >> i); + auto mip_height = std::max(1_u32, extent.height >> i); + + auto mip = dst->mip(i); + if (i == 0) { + cmd_list.bind_image(0, 1, src); + } else { + cmd_list.bind_image(0, 1, dst->mip(i - 1)); + } + + cmd_list.bind_image(0, 2, mip); + cmd_list.push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_width, mip_height)); + cmd_list.dispatch_invocations(mip_width, mip_height); + } + + return std::make_tuple(src, dst); + } + ); + std::tie(depth_attachment, hiz_attachment) = hiz_generate_pass(std::move(depth_attachment), std::move(hiz_attachment)); // ── VISBUFFER DECODE ──────────────────────────────────────────────── @@ -1192,7 +1230,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value Date: Wed, 20 Aug 2025 21:48:04 +0300 Subject: [PATCH 08/27] runtime mouse movement --- Lorr/Editor/Window/InspectorWindow.cc | 79 +++++++++---------- Lorr/Editor/Window/ViewportWindow.cc | 2 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 10 ++- Lorr/Engine/Math/Quat.hh | 4 +- Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 9 ++- Lorr/Engine/Scene/Scene.cc | 69 +++++++--------- Lorr/Engine/Scene/Scene.hh | 29 +++---- Lorr/Engine/Window/Window.cc | 26 +++++- Lorr/Engine/Window/Window.hh | 14 +++- Lorr/Runtime/RuntimeModule.cc | 43 +++++++++- 10 files changed, 174 insertions(+), 111 deletions(-) diff --git a/Lorr/Editor/Window/InspectorWindow.cc b/Lorr/Editor/Window/InspectorWindow.cc index 80b85782..228fbbf6 100755 --- a/Lorr/Editor/Window/InspectorWindow.cc +++ b/Lorr/Editor/Window/InspectorWindow.cc @@ -80,10 +80,6 @@ static auto draw_inspector(InspectorWindow &) -> void { } lr::ECS::ComponentWrapper component(selected_entity, component_id); - if (!component.is_component()) { - return; - } - auto name_with_icon = stack.format_char("{}", component.name); ImGui::PushID(static_cast(component_id)); if (ImGui::CollapsingHeader(name_with_icon, nullptr, ImGuiTreeNodeFlags_DefaultOpen)) { @@ -95,43 +91,45 @@ static auto draw_inspector(InspectorWindow &) -> void { ImGui::TableSetupColumn("label", 0, 0.5f); ImGui::TableSetupColumn("property", ImGuiTableColumnFlags_WidthStretch); - ImGui::PushID(component.members_data); - component.for_each([&](usize &i, std::string_view member_name, lr::ECS::ComponentWrapper::Member &member) { - // Draw prop label - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - - ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ImGui::GetStyle().FramePadding.y); - ImGui::TextUnformatted(member_name.data(), member_name.data() + member_name.length()); - ImGui::TableNextColumn(); - - bool component_modified = false; - ImGui::PushID(static_cast(i)); - std::visit( - ls::match{ - [](const auto &) {}, - [&](bool *v) { component_modified |= ImGui::Checkbox("", v); }, - [&](f32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_Float); }, - [&](i32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_S32); }, - [&](u32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_U32); }, - [&](i64 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_S64); }, - [&](u64 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_U64); }, - [&](glm::vec2 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 2, ImGuiDataType_Float); }, - [&](glm::vec3 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 3, ImGuiDataType_Float); }, - [&](glm::vec4 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 4, ImGuiDataType_Float); }, - [](std::string *v) { ImGui::InputText("", v); }, - [&](lr::UUID *v) { component_modified |= inspect_asset(*v); }, - }, - member - ); + if (component.is_component()) { + ImGui::PushID(component.members_data); + component.for_each([&](usize &i, std::string_view member_name, lr::ECS::ComponentWrapper::Member &member) { + // Draw prop label + ImGui::TableNextRow(); + ImGui::TableNextColumn(); + + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ImGui::GetStyle().FramePadding.y); + ImGui::TextUnformatted(member_name.data(), member_name.data() + member_name.length()); + ImGui::TableNextColumn(); + + bool component_modified = false; + ImGui::PushID(static_cast(i)); + std::visit( + ls::match{ + [](const auto &) {}, + [&](bool *v) { component_modified |= ImGui::Checkbox("", v); }, + [&](f32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_Float); }, + [&](i32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_S32); }, + [&](u32 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_U32); }, + [&](i64 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_S64); }, + [&](u64 *v) { component_modified |= ImGui::drag_vec(0, v, 1, ImGuiDataType_U64); }, + [&](glm::vec2 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 2, ImGuiDataType_Float); }, + [&](glm::vec3 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 3, ImGuiDataType_Float); }, + [&](glm::vec4 *v) { component_modified |= ImGui::drag_vec(0, glm::value_ptr(*v), 4, ImGuiDataType_Float); }, + [](std::string *v) { ImGui::InputText("", v); }, + [&](lr::UUID *v) { component_modified |= inspect_asset(*v); }, + }, + member + ); + ImGui::PopID(); + + if (component_modified) { + selected_entity.modified(component_id.entity()); + } + }); ImGui::PopID(); + } - if (component_modified) { - selected_entity.modified(component_id.entity()); - } - }); - - ImGui::PopID(); ImGui::EndTable(); if (ImGui::Button("Remove Component", ImVec2(region.x, 0))) { @@ -150,8 +148,7 @@ static auto draw_inspector(InspectorWindow &) -> void { ImGui::SetNextWindowPos(ImGui::GetCursorScreenPos()); ImGui::SetNextWindowSize({ region.x, 0 }); if (ImGui::BeginPopup("add_component")) { - auto &entity_db = active_scene->get_entity_db(); - auto all_components = entity_db.get_components(); + auto all_components = active_scene->get_known_component_ids(); for (const auto &component : all_components) { // lr::memory::ScopedStack stack; ImGui::PushID(static_cast(component.raw_id())); diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index 37f6480b..fa401330 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -285,7 +285,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto *transform = selected_entity.get_mut(); auto T = glm::translate(glm::mat4(1.0), transform->position); - auto R = glm::mat4_cast(glm::quat(transform->rotation)); + auto R = glm::mat4_cast(lr::Math::quat_dir(transform->rotation)); auto S = glm::scale(glm::mat4(1.0), transform->scale); auto gizmo_mat = T * R * S; auto delta_mat = glm::mat4(1.0f); diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 2b4fb0f2..66608499 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -497,7 +497,6 @@ auto Device::create_swap_chain(this Device &self, VkSurfaceKHR surface, ls::opti -> std::expected { ZoneScoped; - VkPresentModeKHR present_mode = self.frame_count() == 1 ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; vkb::SwapchainBuilder builder(self.handle, surface); builder.set_desired_min_image_count(self.frame_count()); builder.set_desired_format( @@ -512,7 +511,14 @@ auto Device::create_swap_chain(this Device &self, VkSurfaceKHR surface, ls::opti .colorSpace = vuk::ColorSpaceKHR::eSrgbNonlinear, } ); - builder.set_desired_present_mode(present_mode); + + if (self.frame_count() != 1) { + builder.set_desired_present_mode(VK_PRESENT_MODE_MAILBOX_KHR); + builder.add_fallback_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR); + } else { + builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR); + } + builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_KHR); builder.set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); auto recycling = false; diff --git a/Lorr/Engine/Math/Quat.hh b/Lorr/Engine/Math/Quat.hh index ce2c2cc4..3a8dff31 100644 --- a/Lorr/Engine/Math/Quat.hh +++ b/Lorr/Engine/Math/Quat.hh @@ -8,8 +8,8 @@ inline auto quat_dir(const glm::vec3 &rotation) -> glm::quat { ZoneScoped; glm::quat orientation = {}; - orientation = glm::angleAxis(rotation.x, glm::vec3(0.0f, 1.0f, 0.0f)); - orientation = glm::angleAxis(rotation.y, glm::vec3(1.0f, 0.0f, 0.0f)) * orientation; + orientation = glm::angleAxis(rotation.x, glm::vec3(1.0f, 0.0f, 0.0f)); + orientation = glm::angleAxis(rotation.y, glm::vec3(0.0f, 1.0f, 0.0f)) * orientation; orientation = glm::angleAxis(rotation.z, glm::vec3(0.0f, 0.0f, 1.0f)) * orientation; return glm::normalize(orientation); } diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index 80e1137a..5d21439a 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -22,12 +22,17 @@ ECS_COMPONENT_BEGIN(Transform) ECS_COMPONENT_END(); ECS_COMPONENT_BEGIN(Camera) + ECS_COMPONENT_MEMBER(position, glm::vec3, {0.0f, 0.0f, 0.0f}) + ECS_COMPONENT_MEMBER(yaw, f32, 0.0f) + ECS_COMPONENT_MEMBER(pitch, f32, 0.0f) ECS_COMPONENT_MEMBER(fov, f32, 90.0f) ECS_COMPONENT_MEMBER(resolution, glm::vec2, {}) ECS_COMPONENT_MEMBER(near_clip, f32, 0.1f) ECS_COMPONENT_MEMBER(far_clip, f32, 1000.0f) - ECS_COMPONENT_MEMBER(axis_velocity, glm::vec3, { 0.0, 0.0, 0.0 }) - ECS_COMPONENT_MEMBER(velocity_mul, f32, 1.0) + ECS_COMPONENT_MEMBER(velocity, glm::vec3, { 0.0, 0.0, 0.0 }) + ECS_COMPONENT_MEMBER(max_velocity, f32, 1.0f) + ECS_COMPONENT_MEMBER(accel_speed, f32, 1.0f) + ECS_COMPONENT_MEMBER(decel_speed, f32, 1.0f) ECS_COMPONENT_MEMBER(frustum_projection_view_mat, glm::mat4, glm::mat4(1.0)) ECS_COMPONENT_MEMBER(acceptable_lod_error, f32, 2.0f) ECS_COMPONENT_END(); diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index e7c7e817..31e7497f 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -40,29 +40,12 @@ bool json_to_quat(simdjson::ondemand::value &o, glm::quat &quat) { return true; } -auto SceneEntityDB::import_module(this SceneEntityDB &self, flecs::entity module) -> void { - ZoneScoped; - - self.imported_modules.emplace_back(module); - module.children([&](flecs::id id) { self.components.push_back(id); }); -} - -auto SceneEntityDB::is_component_known(this SceneEntityDB &self, flecs::id component_id) -> bool { - ZoneScoped; - - return std::ranges::any_of(self.components, [&](const auto &id) { return id == component_id; }); -} - -auto SceneEntityDB::get_components(this SceneEntityDB &self) -> ls::span { - return self.components; -} - auto Scene::init(this Scene &self, const std::string &name) -> bool { ZoneScoped; self.name = name; self.world.emplace(); - self.entity_db.import_module(self.world->import ()); + self.import_module(); self.world->observer() .event(flecs::OnSet) @@ -98,15 +81,6 @@ auto Scene::init(this Scene &self, const std::string &name) -> bool { } }); - self.world - ->system() // - .each([&](flecs::iter &it, usize, ECS::Transform &t, ECS::Camera &c) { - auto inv_orient = glm::conjugate(Math::quat_dir(t.rotation)); - t.position += glm::vec3(inv_orient * c.axis_velocity * it.delta_time()); - - c.axis_velocity = {}; - }); - self.root = self.world->entity(); self.root.add(); @@ -193,7 +167,7 @@ static auto json_to_entity(Scene &self, flecs::entity root, simdjson::ondemand:: continue; } - LS_EXPECT(self.get_entity_db().is_component_known(component_id)); + LS_EXPECT(self.is_component_known(component_id)); e.add(component_id); ECS::ComponentWrapper component(e, component_id); @@ -245,6 +219,18 @@ static auto json_to_entity(Scene &self, flecs::entity root, simdjson::ondemand:: return true; } +auto Scene::import_module(this Scene &self, flecs::entity module_entity) -> void { + ZoneScoped; + + module_entity.children([&](flecs::id id) { self.known_component_ids.push_back(id); }); +} + +auto Scene::is_component_known(this Scene &self, flecs::id component_id) -> bool { + ZoneScoped; + + return std::ranges::any_of(self.known_component_ids, [&](const auto &id) { return id == component_id; }); +} + auto Scene::import_from_file(this Scene &self, const fs::path &path) -> bool { ZoneScoped; memory::ScopedStack stack; @@ -406,15 +392,14 @@ auto Scene::delete_entity(this Scene &, flecs::entity entity) -> void { entity.destruct(); } -auto Scene::create_perspective_camera(this Scene &self, const std::string &name, const glm::vec3 &position, const glm::vec3 &rotation, f32 fov) +auto Scene::create_perspective_camera(this Scene &self, const std::string &name, const glm::vec3 &position, f32 yaw, f32 pitch, f32 fov) -> flecs::entity { ZoneScoped; return self .create_entity(name) // .add() - .set({ .position = position, .rotation = glm::radians(Math::normalize_180(rotation)) }) - .set({ .fov = fov }) + .set({ .position = position, .yaw = yaw, .pitch = pitch, .fov = fov }) .child_of(self.root); } @@ -522,7 +507,7 @@ auto Scene::set_dirty(this Scene &self, flecs::entity entity) -> void { const auto *entity_transform = cur_entity.get(); const auto T = glm::translate(glm::mat4(1.0), entity_transform->position); - const auto R = glm::mat4_cast(glm::quat(entity_transform->rotation)); + const auto R = glm::mat4_cast(Math::quat_dir(entity_transform->rotation)); const auto S = glm::scale(glm::mat4(1.0), entity_transform->scale); auto local_mat = T * R * S; auto world_mat = local_mat; @@ -581,10 +566,6 @@ auto Scene::get_name_sv(this Scene &self) -> std::string_view { return self.name; } -auto Scene::get_entity_db(this Scene &self) -> SceneEntityDB & { - return self.entity_db; -} - auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } @@ -596,7 +577,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< // clang-format off auto camera_query = self.get_world() - .query_builder() + .query_builder() .build(); auto rendering_meshes_query = self.get_world() .query_builder() @@ -608,14 +589,18 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< ls::option active_camera_data = override_camera; if (!active_camera_data.has_value()) { - camera_query.each([&active_camera_data](flecs::entity, ECS::Transform &t, ECS::Camera &c, ECS::ActiveCamera) { + camera_query.each([&active_camera_data](flecs::entity, ECS::Camera &c, ECS::ActiveCamera) { auto aspect_ratio = c.resolution.x / c.resolution.y; auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), aspect_ratio, c.far_clip, c.near_clip); projection_mat[1][1] *= -1; - auto translation_mat = glm::translate(glm::mat4(1.0f), -t.position); - auto rotation_mat = glm::mat4_cast(Math::quat_dir(t.rotation)); - auto view_mat = rotation_mat * translation_mat; + auto direction = glm::vec3( + glm::cos(glm::radians(c.yaw)) * glm::cos(glm::radians(c.pitch)), + glm::sin(glm::radians(c.pitch)), + glm::sin(glm::radians(c.yaw)) * glm::cos(glm::radians(c.pitch)) + ); + direction = glm::normalize(direction); + auto view_mat = glm::lookAt(c.position, c.position + direction, glm::vec3(0.0f, 1.0f, 0.0f)); auto &camera_data = active_camera_data.emplace(GPU::Camera{}); camera_data.projection_mat = projection_mat; @@ -623,7 +608,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; camera_data.inv_view_mat = glm::inverse(camera_data.view_mat); camera_data.inv_projection_view_mat = glm::inverse(camera_data.projection_view_mat); - camera_data.position = t.position; + camera_data.position = c.position; camera_data.near_clip = c.near_clip; camera_data.far_clip = c.far_clip; camera_data.resolution = c.resolution; diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 02f00ce5..59c9c3b8 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -25,16 +25,6 @@ struct ankerl::unordered_dense::hash { }; namespace lr { -struct SceneEntityDB { - ankerl::unordered_dense::map component_icons = {}; - std::vector components = {}; - std::vector imported_modules = {}; - - auto import_module(this SceneEntityDB &, flecs::entity module) -> void; - auto is_component_known(this SceneEntityDB &, flecs::id component_id) -> bool; - auto get_components(this SceneEntityDB &) -> ls::span; -}; - struct AssetManager; enum class SceneID : u64 { Invalid = ~0_u64 }; struct Scene { @@ -42,7 +32,7 @@ private: std::string name = {}; flecs::entity root = {}; ls::option world = ls::nullopt; - SceneEntityDB entity_db = {}; + std::vector known_component_ids = {}; SlotMap transforms = {}; ankerl::unordered_dense::map entity_transforms_map = {}; @@ -61,13 +51,21 @@ public: auto init(this Scene &, const std::string &name) -> bool; auto destroy(this Scene &) -> void; + template + auto import_module(this Scene &self) -> void { + ZoneScoped; + + return self.import_module(self.world->import ()); + } + auto import_module(this Scene &, flecs::entity module_entity) -> void; + auto is_component_known(this Scene &, flecs::id component_id) -> bool; + auto import_from_file(this Scene &, const fs::path &path) -> bool; auto export_to_file(this Scene &, const fs::path &path) -> bool; auto create_entity(this Scene &, const std::string &name = {}) -> flecs::entity; auto delete_entity(this Scene &, flecs::entity entity) -> void; - auto create_perspective_camera(this Scene &, const std::string &name, const glm::vec3 &position, const glm::vec3 &rotation, f32 fov) - -> flecs::entity; + auto create_perspective_camera(this Scene &, const std::string &name, const glm::vec3 &position, f32 yaw, f32 pitch, f32 fov) -> flecs::entity; // Model = collection of meshes. // This function imports every mesh inside the model asset. // The returning entity is a parent, "model" entity where each of @@ -89,9 +87,12 @@ public: auto get_world(this Scene &) -> flecs::world &; auto get_name(this Scene &) -> const std::string &; auto get_name_sv(this Scene &) -> std::string_view; - auto get_entity_db(this Scene &) -> SceneEntityDB &; auto get_cull_flags(this Scene &) -> GPU::CullFlags &; + auto get_known_component_ids(this Scene &self) -> auto { + return ls::span(self.known_component_ids); + } + private: auto add_transform(this Scene &, flecs::entity entity) -> GPU::TransformID; auto remove_transform(this Scene &, flecs::entity entity) -> void; diff --git a/Lorr/Engine/Window/Window.cc b/Lorr/Engine/Window/Window.cc index 0ca8fe55..d2e362f6 100644 --- a/Lorr/Engine/Window/Window.cc +++ b/Lorr/Engine/Window/Window.cc @@ -113,6 +113,7 @@ auto Window::init(this Window &self) -> bool { SDL_GetWindowSizeInPixels(self.handle, &self.width, &self.height); SDL_StartTextInput(self.handle); + self.cursors = { SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_DEFAULT), SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_TEXT), SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_MOVE), SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_NS_RESIZE), @@ -141,6 +142,7 @@ auto Window::destroy(this Window &self) -> void { auto Window::update(this Window &self, f64) -> void { ZoneScoped; + self.mouse_moved = false; SDL_Event e = {}; while (SDL_PollEvent(&e) != 0) { switch (e.type) { @@ -158,10 +160,15 @@ auto Window::update(this Window &self, f64) -> void { auto state = KeyState::Up; state |= e.key.down ? KeyState::Down : KeyState::Up; state |= e.key.repeat ? KeyState::Repeat : KeyState::Up; - self.key_events.try_emplace(e.key.scancode, e.key.key, e.key.mod, state); + self.key_events[e.key.scancode] = { .key = e.key.key, .mod = e.key.mod, .state = state }; } break; case SDL_EVENT_KEY_UP: { - self.key_events.try_emplace(e.key.scancode, e.key.key, e.key.mod, KeyState::Up); + self.key_events[e.key.scancode] = { .key = e.key.key, .mod = e.key.mod, .state = KeyState::Up }; + } break; + case SDL_EVENT_MOUSE_MOTION: { + self.mouse_pos_delta = glm::vec2(e.motion.xrel, e.motion.yrel); + self.mouse_pos = glm::vec2(e.motion.x, e.motion.y); + self.mouse_moved = true; } break; case SDL_EVENT_QUIT: { App::close(); @@ -175,6 +182,21 @@ auto Window::update(this Window &self, f64) -> void { } } +auto Window::check_key_state(this Window &self, SDL_Scancode scancode, KeyState state) -> bool { + auto it = self.key_events.find(scancode); + if (it == self.key_events.end()) { + return state == KeyState::Up; + } + + return it->second.state & state; +} + +auto Window::set_relative_mouse(this Window &self, bool enabled) -> void { + ZoneScoped; + + SDL_SetWindowRelativeMouseMode(self.handle, enabled); +} + auto Window::set_cursor(this Window &self, WindowCursor cursor) -> void { ZoneScoped; diff --git a/Lorr/Engine/Window/Window.hh b/Lorr/Engine/Window/Window.hh index 945281d6..50bed01c 100644 --- a/Lorr/Engine/Window/Window.hh +++ b/Lorr/Engine/Window/Window.hh @@ -101,6 +101,9 @@ struct Window { std::array cursors = {}; std::vector> event_listeners = {}; ankerl::unordered_dense::map key_events = {}; + glm::vec2 mouse_pos = {}; + glm::vec2 mouse_pos_delta = {}; + bool mouse_moved = false; static auto init_sdl() -> bool; static auto display_at(i32 monitor_id) -> ls::option; @@ -110,15 +113,18 @@ struct Window { auto destroy(this Window &) -> void; auto update(this Window &, f64) -> void; - auto set_cursor(this Window &, WindowCursor cursor) -> void; - auto get_cursor(this Window &) -> WindowCursor; - auto show_cursor(this Window &, bool show) -> void; - template auto add_listener(T &listener) { event_listeners.push_back([&listener](SDL_Event &e) { listener.window_event(e); }); } + auto check_key_state(this Window &, SDL_Scancode scancode, KeyState state) -> bool; + + auto set_relative_mouse(this Window &, bool enabled) -> void; + auto set_cursor(this Window &, WindowCursor cursor) -> void; + auto get_cursor(this Window &) -> WindowCursor; + auto show_cursor(this Window &, bool show) -> void; + auto get_size(this Window &) -> glm::ivec2; auto get_surface(this Window &, VkInstance instance) -> VkSurfaceKHR; auto get_handle(this Window &) -> void *; diff --git a/Lorr/Runtime/RuntimeModule.cc b/Lorr/Runtime/RuntimeModule.cc index 74e3e64c..1e11c528 100644 --- a/Lorr/Runtime/RuntimeModule.cc +++ b/Lorr/Runtime/RuntimeModule.cc @@ -9,7 +9,44 @@ struct Runtime { Runtime(flecs::world &world) { - world.component(""); + auto &window = lr::App::mod(); + world + .system() // + .each([&](flecs::iter &it, usize, lr::ECS::Camera &c, lr::ECS::ActiveCamera) { + auto target_velocity = glm::vec3(0.0f); + if (window.check_key_state(SDL_SCANCODE_W, lr::KeyState::Down)) { + target_velocity.z = -c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_S, lr::KeyState::Down)) { + target_velocity.z = c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_A, lr::KeyState::Down)) { + target_velocity.x = -c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_D, lr::KeyState::Down)) { + target_velocity.x = c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_E, lr::KeyState::Down)) { + target_velocity.y = c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_Q, lr::KeyState::Down)) { + target_velocity.y = -c.max_velocity; + } + + if (window.mouse_moved) { + auto sensitivity = 0.2f; + c.yaw += window.mouse_pos_delta.x * sensitivity; + c.pitch = glm::clamp(c.pitch - window.mouse_pos_delta.y * sensitivity, -89.9f, 89.9f); + } + + auto acceleration_rate = glm::length(target_velocity) > 0.0f ? c.accel_speed : c.decel_speed; + c.velocity = glm::mix(c.velocity, target_velocity, glm::min(1.0f, acceleration_rate * it.delta_time())); + }); } }; @@ -64,10 +101,14 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { const auto &path_str = asset.path.string(); if (ImGui::Button(path_str.c_str())) { if (self.active_scene_uuid) { + window.set_relative_mouse(false); asset_man.unload_scene(self.active_scene_uuid); } if (asset_man.load_scene(asset_uuid)) { + window.set_relative_mouse(true); + auto *scene_asset = asset_man.get_scene(asset_uuid); + scene_asset->import_module(); self.active_scene_uuid = asset_uuid; } } From cba8cc86435313b8e126f457ad51a2d13469b898 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 21 Aug 2025 01:13:45 +0300 Subject: [PATCH 09/27] fix iterator error --- Lorr/Runtime/RuntimeModule.cc | 26 ++++++++++++++++---------- xmake/packages.lua | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/Lorr/Runtime/RuntimeModule.cc b/Lorr/Runtime/RuntimeModule.cc index 1e11c528..a2fc1c4b 100644 --- a/Lorr/Runtime/RuntimeModule.cc +++ b/Lorr/Runtime/RuntimeModule.cc @@ -93,6 +93,7 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { if (ImGui::Begin("Runtime")) { const auto ®istry = asset_man.get_registry(); + auto loading_scene_uuid = lr::UUID(nullptr); for (const auto &[asset_uuid, asset] : registry) { if (asset.type != lr::AssetType::Scene) { continue; @@ -100,19 +101,24 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { const auto &path_str = asset.path.string(); if (ImGui::Button(path_str.c_str())) { - if (self.active_scene_uuid) { - window.set_relative_mouse(false); - asset_man.unload_scene(self.active_scene_uuid); - } + loading_scene_uuid = asset_uuid; + } + } - if (asset_man.load_scene(asset_uuid)) { - window.set_relative_mouse(true); - auto *scene_asset = asset_man.get_scene(asset_uuid); - scene_asset->import_module(); - self.active_scene_uuid = asset_uuid; - } + if (loading_scene_uuid) { + if (self.active_scene_uuid) { + window.set_relative_mouse(false); + asset_man.unload_scene(self.active_scene_uuid); + } + + if (asset_man.load_scene(loading_scene_uuid)) { + window.set_relative_mouse(true); + auto *scene_asset = asset_man.get_scene(loading_scene_uuid); + scene_asset->import_module(); + self.active_scene_uuid = loading_scene_uuid; } } + } ImGui::End(); diff --git a/xmake/packages.lua b/xmake/packages.lua index 7c01f1e9..5ab82d17 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -60,6 +60,6 @@ add_requires("vuk 2025.07.09", { configs = { }, debug = is_mode("debug") }) add_requires("meshoptimizer v0.24") -add_requires("ktx v4.4.0", { debug = false }) +add_requires("ktx v4.4.0", { debug = true }) add_requires("svector v1.0.3") From e3f2479f3a5bfb96016321d1e7c440d43b477134 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 22 Aug 2025 13:55:01 +0300 Subject: [PATCH 10/27] fix camera mouse movements --- Lorr/Editor/Window/ViewportWindow.cc | 39 +++++++++---------- Lorr/Engine/Math/Rotation.hh | 15 ------- Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 3 -- Lorr/Engine/Scene/EditorCamera.cc | 26 ++++++++++--- Lorr/Engine/Scene/EditorCamera.hh | 6 ++- Lorr/Engine/Scene/Scene.cc | 18 +++++---- Lorr/Engine/Window/Window.cc | 15 ++++++- Lorr/Engine/Window/Window.hh | 2 +- Lorr/Engine/pch.hh | 3 -- Lorr/Runtime/RuntimeModule.cc | 36 +++++++++++------ Lorr/Runtime/RuntimeModule.hh | 1 + 11 files changed, 92 insertions(+), 72 deletions(-) delete mode 100644 Lorr/Engine/Math/Rotation.hh diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index fa401330..e4c72bfe 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -130,9 +130,10 @@ static auto draw_tools(ViewportWindow &self) -> void { ImGui::drag_vec(0, glm::value_ptr(self.editor_camera.position), 3, ImGuiDataType_Float); ImGui::SeparatorText("Rotation"); - auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); - ImGui::drag_vec(1, glm::value_ptr(camera_rotation_degrees), 3, ImGuiDataType_Float); - self.editor_camera.rotation = glm::radians(lr::Math::normalize_180(camera_rotation_degrees)); + auto camera_yaw_pitch = glm::vec2(self.editor_camera.yaw, self.editor_camera.pitch); + ImGui::drag_vec(1, glm::value_ptr(camera_yaw_pitch), 2, ImGuiDataType_Float); + self.editor_camera.yaw = camera_yaw_pitch.x; + self.editor_camera.pitch = camera_yaw_pitch.y; ImGui::SeparatorText("FoV"); ImGui::drag_vec(2, &self.editor_camera.fov, 1, ImGuiDataType_Float); @@ -200,19 +201,19 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto target_velocity = glm::vec3(0.0f); if (!ImGuizmo::IsUsingAny() && ImGui::IsWindowHovered()) { if (ImGui::IsKeyDown(ImGuiKey_W)) { - target_velocity.z = -self.editor_camera.max_velocity; + target_velocity.x = self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_S)) { - target_velocity.z = self.editor_camera.max_velocity; - } - - if (ImGui::IsKeyDown(ImGuiKey_A)) { target_velocity.x = -self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_D)) { - target_velocity.x = self.editor_camera.max_velocity; + target_velocity.z = self.editor_camera.max_velocity; + } + + if (ImGui::IsKeyDown(ImGuiKey_A)) { + target_velocity.z = -self.editor_camera.max_velocity; } if (ImGui::IsKeyDown(ImGuiKey_E)) { @@ -223,15 +224,14 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 target_velocity.y = -self.editor_camera.max_velocity; } - if (ImGui::IsMouseDragging(ImGuiMouseButton_Right)) { - auto drag = ImGui::GetMouseDragDelta(ImGuiMouseButton_Right, 0); - ImGui::ResetMouseDragDelta(ImGuiMouseButton_Right); + if (ImGui::IsMouseDragging(ImGuiMouseButton_Left)) { + auto drag = ImGui::GetMouseDragDelta(ImGuiMouseButton_Left, 0); + ImGui::ResetMouseDragDelta(ImGuiMouseButton_Left); auto sensitivity = 0.1f; - auto camera_rotation_degrees = glm::degrees(self.editor_camera.rotation); - camera_rotation_degrees.x += drag.x * sensitivity; - camera_rotation_degrees.y += drag.y * sensitivity; - self.editor_camera.rotation = glm::radians(camera_rotation_degrees); + self.editor_camera.yaw -= drag.x * sensitivity; + self.editor_camera.pitch += drag.y * sensitivity; + self.editor_camera.pitch = glm::clamp(self.editor_camera.pitch, -89.9f, 89.9f); } } @@ -273,15 +273,14 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 } if (selected_entity && selected_entity.has()) { - auto camera_forward = glm::vec3(0.0, 0.0, 1.0) * lr::Math::quat_dir(self.editor_camera.rotation); - auto camera_projection = glm::perspective( + auto camera_direction = self.editor_camera.direction(); + auto camera_projection = glm::perspectiveRH_ZO( glm::radians(self.editor_camera.fov), self.editor_camera.aspect_ratio(), self.editor_camera.far_clip, self.editor_camera.near_clip ); - auto camera_view = glm::lookAt(self.editor_camera.position, self.editor_camera.position + camera_forward, glm::vec3(0.0, 1.0, 0.0)); - camera_projection[1][1] *= -1.0f; + auto camera_view = glm::lookAt(self.editor_camera.position, self.editor_camera.position + camera_direction, glm::vec3(0.0, 1.0, 0.0)); auto *transform = selected_entity.get_mut(); auto T = glm::translate(glm::mat4(1.0), transform->position); diff --git a/Lorr/Engine/Math/Rotation.hh b/Lorr/Engine/Math/Rotation.hh deleted file mode 100644 index c22839ad..00000000 --- a/Lorr/Engine/Math/Rotation.hh +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -namespace lr::Math { -// Normalize a vector in degrees form to [-180, 180] -template -constexpr auto normalize_180(const glm::vec &rot) -> glm::vec { - return glm::mod(rot + 180.0f, glm::vec(360.0f)) - 180.0f; -} - -// Normalize a vector in degrees form to [-90, 90] -template -constexpr auto normalize_90(const glm::vec &rot) -> glm::vec { - return glm::mod(rot + 90.0f, glm::vec(180.0f)) - 90.0f; -} -} // namespace lr::Math diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index 5d21439a..94adee66 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -22,9 +22,6 @@ ECS_COMPONENT_BEGIN(Transform) ECS_COMPONENT_END(); ECS_COMPONENT_BEGIN(Camera) - ECS_COMPONENT_MEMBER(position, glm::vec3, {0.0f, 0.0f, 0.0f}) - ECS_COMPONENT_MEMBER(yaw, f32, 0.0f) - ECS_COMPONENT_MEMBER(pitch, f32, 0.0f) ECS_COMPONENT_MEMBER(fov, f32, 90.0f) ECS_COMPONENT_MEMBER(resolution, glm::vec2, {}) ECS_COMPONENT_MEMBER(near_clip, f32, 0.1f) diff --git a/Lorr/Engine/Scene/EditorCamera.cc b/Lorr/Engine/Scene/EditorCamera.cc index 26956765..80a7a2d6 100644 --- a/Lorr/Engine/Scene/EditorCamera.cc +++ b/Lorr/Engine/Scene/EditorCamera.cc @@ -4,17 +4,20 @@ namespace lr { auto EditorCamera::update(this EditorCamera &self, f32 delta_time, const glm::vec3 &target_velocity) -> void { ZoneScoped; - auto inv_orient = glm::conjugate(lr::Math::quat_dir(self.rotation)); - auto acceleration_rate = glm::length(target_velocity) > 0.0f ? self.accel_speed : self.decel_speed; + auto direction = self.direction(); + auto up = glm::vec3(0.0f, 1.0f, 0.0f); + auto right = glm::normalize(glm::cross(direction, up)); + + auto cur_speed = glm::length(target_velocity); + auto acceleration_rate = cur_speed > 0.0f ? self.accel_speed : self.decel_speed; self.velocity = glm::mix(self.velocity, target_velocity, glm::min(1.0f, acceleration_rate * delta_time)); - self.position += inv_orient * self.velocity * delta_time; + auto world_velocity = self.velocity.x * direction + self.velocity.y * up + self.velocity.z * right; + self.position += world_velocity * delta_time; auto projection_mat = glm::perspectiveRH_ZO(glm::radians(self.fov), self.aspect_ratio(), self.far_clip, self.near_clip); projection_mat[1][1] *= -1.0f; - auto translation_mat = glm::translate(glm::mat4(1.0f), -self.position); - auto rotation_mat = glm::mat4_cast(lr::Math::quat_dir(self.rotation)); - auto view_mat = rotation_mat * translation_mat; + auto view_mat = glm::lookAt(self.position, self.position + direction, up); auto projection_view_mat = projection_mat * view_mat; self.frustum_projection_view_mat = self.projection_view_mat; @@ -25,4 +28,15 @@ auto EditorCamera::update(this EditorCamera &self, f32 delta_time, const glm::ve self.inv_projection_view_mat = glm::inverse(projection_view_mat); self.acceptable_lod_error = 2.0f; } + +auto EditorCamera::direction(this EditorCamera &self) -> glm::vec3 { + ZoneScoped; + + auto direction = glm::vec3( + glm::cos(glm::radians(self.yaw)) * glm::cos(glm::radians(self.pitch)), + glm::sin(glm::radians(self.pitch)), + glm::sin(glm::radians(self.yaw)) * glm::cos(glm::radians(self.pitch)) + ); + return glm::normalize(direction); +} } // namespace lr diff --git a/Lorr/Engine/Scene/EditorCamera.hh b/Lorr/Engine/Scene/EditorCamera.hh index 990ea5cb..7f18841c 100644 --- a/Lorr/Engine/Scene/EditorCamera.hh +++ b/Lorr/Engine/Scene/EditorCamera.hh @@ -4,15 +4,17 @@ namespace lr { struct EditorCamera : GPU::Camera { - glm::vec3 rotation = {}; - glm::vec3 velocity = {}; + f32 yaw = 0.0f; + f32 pitch = 0.0f; f32 fov = 65.0f; + glm::vec3 velocity = {}; f32 max_velocity = 2.0f; f32 accel_speed = 8.0f; f32 decel_speed = 12.0f; auto update(this EditorCamera &, f32 delta_time, const glm::vec3 &target_velocity) -> void; + auto direction(this EditorCamera &) -> glm::vec3; auto aspect_ratio() -> f32 { return resolution.x / resolution.y; } diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 31e7497f..a2da98f5 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -4,6 +4,7 @@ #include "Engine/Core/App.hh" +#include "Engine/Math/Quat.hh" #include "Engine/Memory/Stack.hh" #include "Engine/OS/File.hh" @@ -399,7 +400,8 @@ auto Scene::create_perspective_camera(this Scene &self, const std::string &name, return self .create_entity(name) // .add() - .set({ .position = position, .yaw = yaw, .pitch = pitch, .fov = fov }) + .set({ .position = position, .rotation = { yaw, pitch, 0.0f } }) + .set({ .fov = fov }) .child_of(self.root); } @@ -577,7 +579,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< // clang-format off auto camera_query = self.get_world() - .query_builder() + .query_builder() .build(); auto rendering_meshes_query = self.get_world() .query_builder() @@ -589,18 +591,18 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< ls::option active_camera_data = override_camera; if (!active_camera_data.has_value()) { - camera_query.each([&active_camera_data](flecs::entity, ECS::Camera &c, ECS::ActiveCamera) { + camera_query.each([&active_camera_data](flecs::entity, ECS::Transform &t, ECS::Camera &c, ECS::ActiveCamera) { auto aspect_ratio = c.resolution.x / c.resolution.y; auto projection_mat = glm::perspectiveRH_ZO(glm::radians(c.fov), aspect_ratio, c.far_clip, c.near_clip); projection_mat[1][1] *= -1; auto direction = glm::vec3( - glm::cos(glm::radians(c.yaw)) * glm::cos(glm::radians(c.pitch)), - glm::sin(glm::radians(c.pitch)), - glm::sin(glm::radians(c.yaw)) * glm::cos(glm::radians(c.pitch)) + glm::cos(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)) ); direction = glm::normalize(direction); - auto view_mat = glm::lookAt(c.position, c.position + direction, glm::vec3(0.0f, 1.0f, 0.0f)); + auto view_mat = glm::lookAt(t.position, t.position + direction, glm::vec3(0.0f, 1.0f, 0.0f)); auto &camera_data = active_camera_data.emplace(GPU::Camera{}); camera_data.projection_mat = projection_mat; @@ -608,7 +610,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; camera_data.inv_view_mat = glm::inverse(camera_data.view_mat); camera_data.inv_projection_view_mat = glm::inverse(camera_data.projection_view_mat); - camera_data.position = c.position; + camera_data.position = t.position; camera_data.near_clip = c.near_clip; camera_data.far_clip = c.far_clip; camera_data.resolution = c.resolution; diff --git a/Lorr/Engine/Window/Window.cc b/Lorr/Engine/Window/Window.cc index d2e362f6..6cfa1bb5 100644 --- a/Lorr/Engine/Window/Window.cc +++ b/Lorr/Engine/Window/Window.cc @@ -155,6 +155,8 @@ auto Window::update(this Window &self, f64) -> void { auto surface = self.get_surface(device.get_instance()); self.swap_chain = device.create_swap_chain(surface, std::move(self.swap_chain)).value(); + + self.set_relative_mouse(SDL_GetWindowRelativeMouseMode(self.handle)); } break; case SDL_EVENT_KEY_DOWN: { auto state = KeyState::Up; @@ -166,7 +168,6 @@ auto Window::update(this Window &self, f64) -> void { self.key_events[e.key.scancode] = { .key = e.key.key, .mod = e.key.mod, .state = KeyState::Up }; } break; case SDL_EVENT_MOUSE_MOTION: { - self.mouse_pos_delta = glm::vec2(e.motion.xrel, e.motion.yrel); self.mouse_pos = glm::vec2(e.motion.x, e.motion.y); self.mouse_moved = true; } break; @@ -193,10 +194,20 @@ auto Window::check_key_state(this Window &self, SDL_Scancode scancode, KeyState auto Window::set_relative_mouse(this Window &self, bool enabled) -> void { ZoneScoped; - + auto center_rect = SDL_Rect{ .x = self.width / 2, .y = self.height / 2, .w = 1, .h = 1 }; + SDL_SetWindowMouseRect(self.handle, ¢er_rect); SDL_SetWindowRelativeMouseMode(self.handle, enabled); } +auto Window::get_delta_mouse_pos(this Window &) -> glm::vec2 { + ZoneScoped; + + auto result = glm::vec2(); + SDL_GetRelativeMouseState(&result.x, &result.y); + + return result; +} + auto Window::set_cursor(this Window &self, WindowCursor cursor) -> void { ZoneScoped; diff --git a/Lorr/Engine/Window/Window.hh b/Lorr/Engine/Window/Window.hh index 50bed01c..10f88abf 100644 --- a/Lorr/Engine/Window/Window.hh +++ b/Lorr/Engine/Window/Window.hh @@ -102,7 +102,6 @@ struct Window { std::vector> event_listeners = {}; ankerl::unordered_dense::map key_events = {}; glm::vec2 mouse_pos = {}; - glm::vec2 mouse_pos_delta = {}; bool mouse_moved = false; static auto init_sdl() -> bool; @@ -121,6 +120,7 @@ struct Window { auto check_key_state(this Window &, SDL_Scancode scancode, KeyState state) -> bool; auto set_relative_mouse(this Window &, bool enabled) -> void; + auto get_delta_mouse_pos(this Window &) -> glm::vec2; auto set_cursor(this Window &, WindowCursor cursor) -> void; auto get_cursor(this Window &) -> WindowCursor; auto show_cursor(this Window &, bool show) -> void; diff --git a/Lorr/Engine/pch.hh b/Lorr/Engine/pch.hh index e985c224..1327f8af 100755 --- a/Lorr/Engine/pch.hh +++ b/Lorr/Engine/pch.hh @@ -40,9 +40,6 @@ namespace fs = std::filesystem; #include #include -#include "Engine/Math/Quat.hh" -#include "Engine/Math/Rotation.hh" - #include "Core/Logger.hh" #include #include diff --git a/Lorr/Runtime/RuntimeModule.cc b/Lorr/Runtime/RuntimeModule.cc index a2fc1c4b..224cc780 100644 --- a/Lorr/Runtime/RuntimeModule.cc +++ b/Lorr/Runtime/RuntimeModule.cc @@ -11,23 +11,23 @@ struct Runtime { Runtime(flecs::world &world) { auto &window = lr::App::mod(); world - .system() // - .each([&](flecs::iter &it, usize, lr::ECS::Camera &c, lr::ECS::ActiveCamera) { + .system() // + .each([&](flecs::iter &it, usize, lr::ECS::Transform &t, lr::ECS::Camera &c, lr::ECS::ActiveCamera) { auto target_velocity = glm::vec3(0.0f); if (window.check_key_state(SDL_SCANCODE_W, lr::KeyState::Down)) { - target_velocity.z = -c.max_velocity; + target_velocity.x = c.max_velocity; } if (window.check_key_state(SDL_SCANCODE_S, lr::KeyState::Down)) { - target_velocity.z = c.max_velocity; - } - - if (window.check_key_state(SDL_SCANCODE_A, lr::KeyState::Down)) { target_velocity.x = -c.max_velocity; } if (window.check_key_state(SDL_SCANCODE_D, lr::KeyState::Down)) { - target_velocity.x = c.max_velocity; + target_velocity.z = c.max_velocity; + } + + if (window.check_key_state(SDL_SCANCODE_A, lr::KeyState::Down)) { + target_velocity.z = -c.max_velocity; } if (window.check_key_state(SDL_SCANCODE_E, lr::KeyState::Down)) { @@ -39,13 +39,26 @@ struct Runtime { } if (window.mouse_moved) { - auto sensitivity = 0.2f; - c.yaw += window.mouse_pos_delta.x * sensitivity; - c.pitch = glm::clamp(c.pitch - window.mouse_pos_delta.y * sensitivity, -89.9f, 89.9f); + auto mouse_pos_delta = window.get_delta_mouse_pos(); + auto sensitivity = 0.1f; + t.rotation.x += mouse_pos_delta.x * sensitivity; + t.rotation.y -= mouse_pos_delta.y * sensitivity; + t.rotation.y = glm::clamp(t.rotation.y, -89.9f, 89.9f); } auto acceleration_rate = glm::length(target_velocity) > 0.0f ? c.accel_speed : c.decel_speed; c.velocity = glm::mix(c.velocity, target_velocity, glm::min(1.0f, acceleration_rate * it.delta_time())); + + auto direction = glm::vec3( + glm::cos(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)) + ); + direction = glm::normalize(direction); + auto up = glm::vec3(0.0f, 1.0f, 0.0f); + auto right = glm::normalize(glm::cross(direction, up)); + auto world_velocity = c.velocity.x * direction + c.velocity.y * up + c.velocity.z * right; + t.position += world_velocity * it.delta_time(); }); } }; @@ -118,7 +131,6 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { self.active_scene_uuid = loading_scene_uuid; } } - } ImGui::End(); diff --git a/Lorr/Runtime/RuntimeModule.hh b/Lorr/Runtime/RuntimeModule.hh index 9ade34b5..e96d3998 100644 --- a/Lorr/Runtime/RuntimeModule.hh +++ b/Lorr/Runtime/RuntimeModule.hh @@ -5,6 +5,7 @@ struct RuntimeModule { static constexpr auto MODULE_NAME = "Runtime"; + bool debugging = false; fs::path world_path = {}; lr::UUID active_scene_uuid = lr::UUID(nullptr); From 69432028dd8ca8ae4c75e0f07e8dfc0e20b73a5a Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 22 Aug 2025 20:41:22 +0300 Subject: [PATCH 11/27] reorganize scenerenderer --- Lorr/Editor/Window/ViewportWindow.cc | 4 +- Lorr/Engine/Asset/Asset.cc | 2 +- .../shaders/passes/cull_meshlets.slang | 74 +- Lorr/Engine/Scene/SceneRenderer.cc | 769 +++++++++++------- Lorr/Engine/Scene/SceneRenderer.hh | 5 +- Lorr/Engine/Window/Window.cc | 6 +- xmake/packages.lua | 2 +- 7 files changed, 509 insertions(+), 353 deletions(-) diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index e4c72bfe..46e8cd08 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -229,8 +229,8 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 ImGui::ResetMouseDragDelta(ImGuiMouseButton_Left); auto sensitivity = 0.1f; - self.editor_camera.yaw -= drag.x * sensitivity; - self.editor_camera.pitch += drag.y * sensitivity; + self.editor_camera.yaw += drag.x * sensitivity; + self.editor_camera.pitch -= drag.y * sensitivity; self.editor_camera.pitch = glm::clamp(self.editor_camera.pitch, -89.9f, 89.9f); } } diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 71691ccf..81934d49 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -825,7 +825,7 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool nullptr, lod_index_count, TARGET_ERROR, - meshopt_SimplifyLockBorder, + meshopt_SimplifyLockBorder | meshopt_SimplifyPermissive, &result_error ); diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index d547632f..d55e325b 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -6,55 +6,63 @@ import debug_drawer; #include -struct ShaderParameters { - ConstantBuffer camera; - StructuredBuffer meshlet_instances; - StructuredBuffer mesh_instances; - StructuredBuffer meshes; - StructuredBuffer transforms; - Image2D hiz_image; - Sampler hiz_sampler; - StructuredBuffer meshlet_instances_count; - - RWStructuredBuffer cull_triangles_cmd; - RWStructuredBuffer visible_meshlet_instances_indices; - RWStructuredBuffer debug_drawer; -}; +[[vk::constant_id(0)]] const u32 LATE = 0; +[[vk::binding(0)]] ConstantBuffer camera; +[[vk::binding(1)]] StructuredBuffer meshlet_instances; +[[vk::binding(2)]] StructuredBuffer mesh_instances; +[[vk::binding(3)]] StructuredBuffer meshes; +[[vk::binding(4)]] StructuredBuffer transforms; +[[vk::binding(5)]] Image2D hiz_image; +[[vk::binding(6)]] Sampler hiz_sampler; +[[vk::binding(7)]] StructuredBuffer visible_meshlet_instances_count; +[[vk::binding(8)]] RWStructuredBuffer meshlet_visibility_mask; +[[vk::binding(9)]] RWStructuredBuffer cull_triangles_cmd; +[[vk::binding(10)]] RWStructuredBuffer visible_meshlet_instances_indices; +[[vk::binding(11)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHLET_COUNT #define CULLING_MESHLET_COUNT 64 #endif +// TODO: Replace this when vuk can support bool constants +constexpr static bool IS_EARLY = LATE == 0; +constexpr static bool IS_LATE = LATE == 1; + [[shader("compute")]] [[numthreads(CULLING_MESHLET_COUNT, 1, 1)]] func cs_main( uint3 thread_id : SV_DispatchThreadID, - uniform ParameterBlock params, - uniform CullFlags cull_flags + uniform CullFlags cull_flags, ) -> void { - let meshlet_instance_count = params.meshlet_instances_count[0]; + let meshlet_instance_count = visible_meshlet_instances_count[0]; let meshlet_instance_index = thread_id.x; if (meshlet_instance_index >= meshlet_instance_count) { return; } - let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; - let mesh = params.meshes[mesh_instance.mesh_index]; - let transform = params.transforms[mesh_instance.transform_index]; + let meshlet_instance = meshlet_instances[meshlet_instance_index]; + let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = meshes[mesh_instance.mesh_index]; + let transform = transforms[mesh_instance.transform_index]; let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; var visible = true; + + let mask_index = meshlet_instance_index / 32; + let bit_index = meshlet_instance_index - mask_index * 32; + let visibility_bit = 1 << bit_index; + let was_visible = bool(meshlet_visibility_mask[mask_index] & visibility_bit); + if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - let cur_mvp = mul(params.camera.projection_view_mat, transform.world); + let cur_mvp = mul(camera.projection_view_mat, transform.world); visible = test_frustum(cur_mvp, bounds.aabb_center, bounds.aabb_extent); } - if (visible && (cull_flags & CullFlags::Occlusion)) { - let prev_mvp = mul(params.camera.frustum_projection_view_mat, transform.world); - if (let screen_aabb = project_aabb(prev_mvp, params.camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, params.hiz_image, params.hiz_sampler); + if (IS_LATE && visible && (cull_flags & CullFlags::Occlusion)) { + let prev_mvp = mul(camera.frustum_projection_view_mat, transform.world); + if (let screen_aabb = project_aabb(prev_mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler); if (visible && true) { let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; @@ -63,13 +71,21 @@ func cs_main( debug_rect.extent = ndc_aabb_max - ndc_aabb_min; debug_rect.color = f32x3(1.0, 0.0, 0.0); debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(params.debug_drawer[0], debug_rect); + debug_draw_rect(debug_drawer[0], debug_rect); } } } + if (IS_LATE && (cull_flags & CullFlags::Occlusion)) { + if (visible) { + std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); + } else { + std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); + } + } + if (visible) { - let index = std::atomic_add(params.cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - params.visible_meshlet_instances_indices[index] = meshlet_instance_index; + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; } } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index b7bf668d..759849e6 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -12,6 +12,20 @@ enum BindlessDescriptorLayout : u32 { SampledImages = 1, }; +static constexpr auto sampler_min_clamp_reduction_mode = VkSamplerReductionModeCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO, + .pNext = nullptr, + .reductionMode = VK_SAMPLER_REDUCTION_MODE_MIN, +}; +static constexpr auto hiz_sampler_info = vuk::SamplerCreateInfo{ + .pNext = &sampler_min_clamp_reduction_mode, + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .mipmapMode = vuk::SamplerMipmapMode::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, +}; + auto SceneRenderer::init(this SceneRenderer &self) -> bool { ZoneScoped; @@ -205,6 +219,12 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in auto &transfer_man = device.transfer_man(); auto prepared_frame = PreparedFrame{}; + auto zero_fill_pass = vuk::make_pass("zero fill", [](vuk::CommandBuffer &command_buffer, VUK_BA(vuk::eTransferWrite) dst) { + command_buffer.fill_buffer(dst, 0_u32); + + return dst; + }); + if (!info.dirty_transform_ids.empty()) { auto rebuild_transforms = !self.transforms_buffer || self.transforms_buffer.data_size() <= info.gpu_transforms.size_bytes(); self.transforms_buffer = self.transforms_buffer.resize(device, info.gpu_transforms.size_bytes()).value(); @@ -214,7 +234,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.transforms_buffer = transfer_man.upload_staging(info.gpu_transforms, self.transforms_buffer); } else { // Buffer is not resized, upload individual transforms. - auto dirty_transforms_count = info.dirty_transform_ids.size(); auto dirty_transforms_size_bytes = dirty_transforms_count * sizeof(GPU::Transforms); auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_transforms_size_bytes); @@ -316,12 +335,22 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (info.max_meshlet_instance_count > 0) { prepared_frame.meshlet_instances_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(GPU::MeshletInstance)); - prepared_frame.visible_meshlet_instances_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(u32)); - prepared_frame.reordered_indices_buffer = transfer_man.alloc_transient_buffer( - vuk::MemoryUsage::eGPUonly, - info.max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) - ); + + auto meshlet_instance_visibility_mask_size_bytes = (info.max_meshlet_instance_count + 31) / 32 * sizeof(u32); + if (meshlet_instance_visibility_mask_size_bytes > self.meshlet_instance_visibility_mask_buffer.data_size()) { + self.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.resize(device, meshlet_instance_visibility_mask_size_bytes).value(); + prepared_frame.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instance visibility mask", vuk::eNone); + prepared_frame.meshlet_instance_visibility_mask_buffer = + zero_fill_pass(std::move(prepared_frame.meshlet_instance_visibility_mask_buffer)); + } else { + prepared_frame.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instance visibility mask", vuk::eMemoryRead); + } + } else { + device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); + self.meshlet_instance_visibility_mask_buffer = {}; } info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); @@ -332,6 +361,7 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.camera_buffer = transfer_man.scratch_buffer(info.camera); prepared_frame.mesh_instance_count = info.mesh_instance_count; + prepared_frame.max_meshlet_instance_count = info.max_meshlet_instance_count; prepared_frame.environment_flags = static_cast(info.environment.flags); if (info.regenerate_sky || !self.sky_transmittance_lut_view) { @@ -391,6 +421,350 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in return prepared_frame; } +static auto cull_scene( + bool late_pass, + u32 max_meshlet_instance_count, + GPU::CullFlags cull_flags, + TransferManager &transfer_man, + vuk::Value &hiz_attachment, + vuk::Value &cull_meshlets_cmd_buffer, + vuk::Value &visible_meshlet_instances_count_buffer, + vuk::Value &meshlet_instance_visibility_mask_buffer, + vuk::Value &meshes_buffer, + vuk::Value &mesh_instances_buffer, + vuk::Value &meshlet_instances_buffer, + vuk::Value &transforms_buffer, + vuk::Value &camera_buffer, + vuk::Value &debug_drawer_buffer +) -> std::tuple, vuk::Value> { + ZoneScoped; + + // ── CULL MESHLETS ─────────────────────────────────────────────────── + auto vis_cull_meshlets_pass = vuk::make_pass( + "vis cull meshlets", + [late_pass, cull_flags]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eIndirectRead) dispatch_cmd, + VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, + VUK_BA(vuk::eComputeRead) meshes, + VUK_BA(vuk::eComputeRead) transforms, + VUK_IA(vuk::eComputeRead) hiz, + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) meshlet_instance_visibility_mask, + VUK_BA(vuk::eComputeRW) cull_triangles_cmd, + VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRW) debug_drawer + ) { + cmd_list // + .bind_compute_pipeline("passes.cull_meshlets") + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_image(0, 5, hiz) + .bind_sampler(0, 6, hiz_sampler_info) + .bind_buffer(0, 7, visible_meshlet_instances_count) + .bind_buffer(0, 8, meshlet_instance_visibility_mask) + .bind_buffer(0, 9, cull_triangles_cmd) + .bind_buffer(0, 10, visible_meshlet_instances_indices) + .bind_buffer(0, 11, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) + .specialize_constants(0, late_pass ? 1 : 0) + .dispatch_indirect(dispatch_cmd); + + return std::make_tuple( + dispatch_cmd, + camera, + meshlet_instances, + mesh_instances, + meshes, + transforms, + hiz, + visible_meshlet_instances_count, + meshlet_instance_visibility_mask, + cull_triangles_cmd, + visible_meshlet_instances_indices, + debug_drawer + ); + } + ); + + auto cull_triangles_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); + auto visible_meshlet_instances_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, max_meshlet_instance_count * sizeof(u32)); + + std::tie( + cull_meshlets_cmd_buffer, + camera_buffer, + meshlet_instances_buffer, + mesh_instances_buffer, + meshes_buffer, + transforms_buffer, + hiz_attachment, + visible_meshlet_instances_count_buffer, + meshlet_instance_visibility_mask_buffer, + cull_triangles_cmd_buffer, + visible_meshlet_instances_indices_buffer, + debug_drawer_buffer + ) = + vis_cull_meshlets_pass( + std::move(cull_meshlets_cmd_buffer), + std::move(camera_buffer), + std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(hiz_attachment), + std::move(visible_meshlet_instances_count_buffer), + std::move(meshlet_instance_visibility_mask_buffer), + std::move(cull_triangles_cmd_buffer), + std::move(visible_meshlet_instances_indices_buffer), + std::move(debug_drawer_buffer) + ); + + // ── CULL TRIANGLES ────────────────────────────────────────────────── + auto vis_cull_triangles_pass = vuk::make_pass( + "vis cull triangles", + [cull_flags]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, + VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, + VUK_BA(vuk::eComputeRead) meshes, + VUK_BA(vuk::eComputeRead) transforms, + VUK_BA(vuk::eComputeRW) draw_indexed_cmd, + VUK_BA(vuk::eComputeWrite) reordered_indices + ) { + cmd_list // + .bind_compute_pipeline("passes.cull_triangles") + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, visible_meshlet_instances_indices) + .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 3, mesh_instances) + .bind_buffer(0, 4, meshes) + .bind_buffer(0, 5, transforms) + .bind_buffer(0, 6, draw_indexed_cmd) + .bind_buffer(0, 7, reordered_indices) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) + .dispatch_indirect(cull_triangles_cmd); + + return std::make_tuple( + camera, + visible_meshlet_instances_indices, + meshlet_instances, + mesh_instances, + meshes, + transforms, + draw_indexed_cmd, + reordered_indices + ); + } + ); + + auto draw_command_buffer = transfer_man.scratch_buffer({ .instanceCount = 1 }); + auto reordered_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32)); + + std::tie( + camera_buffer, + visible_meshlet_instances_indices_buffer, + meshlet_instances_buffer, + mesh_instances_buffer, + meshes_buffer, + transforms_buffer, + draw_command_buffer, + reordered_indices_buffer + ) = + vis_cull_triangles_pass( + std::move(cull_triangles_cmd_buffer), + std::move(camera_buffer), + std::move(visible_meshlet_instances_indices_buffer), + std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(draw_command_buffer), + std::move(reordered_indices_buffer) + ); + + return std::make_tuple(draw_command_buffer, reordered_indices_buffer); +} + +static auto draw_visbuffer( + vuk::PersistentDescriptorSet &descriptor_set, + vuk::Value &depth_attachment, + vuk::Value &visbuffer_attachment, + vuk::Value &overdraw_attachment, + vuk::Value &draw_command_buffer, + vuk::Value &reordered_indices_buffer, + vuk::Value &meshes_buffer, + vuk::Value &mesh_instances_buffer, + vuk::Value &meshlet_instances_buffer, + vuk::Value &transforms_buffer, + vuk::Value &materials_buffer, + vuk::Value &camera_buffer +) -> void { + ZoneScoped; + + auto vis_clear_pass = vuk::make_pass( + "vis clear", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeWrite) visbuffer, + VUK_IA(vuk::eComputeWrite) overdraw) { + cmd_list // + .bind_compute_pipeline("passes.visbuffer_clear") + .bind_image(0, 0, visbuffer) + .bind_image(0, 1, overdraw) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(glm::uvec2(visbuffer->extent.width, visbuffer->extent.height))) + .dispatch_invocations_per_pixel(visbuffer); + + return std::make_tuple(visbuffer, overdraw); + } + ); + + std::tie(visbuffer_attachment, overdraw_attachment) = vis_clear_pass(std::move(visbuffer_attachment), std::move(overdraw_attachment)); + + auto vis_encode_pass = vuk::make_pass( + "vis encode", + [&descriptor_set]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eIndirectRead) triangle_indirect, + VUK_BA(vuk::eIndexRead) index_buffer, + VUK_BA(vuk::eVertexRead) camera, + VUK_BA(vuk::eVertexRead) meshlet_instances, + VUK_BA(vuk::eVertexRead) mesh_instances, + VUK_BA(vuk::eVertexRead) meshes, + VUK_BA(vuk::eVertexRead) transforms, + VUK_BA(vuk::eFragmentRead) materials, + VUK_IA(vuk::eColorRW) visbuffer, + VUK_IA(vuk::eDepthStencilRW) depth, + VUK_IA(vuk::eFragmentRW) overdraw + ) { + cmd_list // + .bind_graphics_pipeline("passes.visbuffer_encode") + .set_rasterization({ .cullMode = vuk::CullModeFlagBits::eBack }) + .set_depth_stencil({ .depthTestEnable = true, .depthWriteEnable = true, .depthCompareOp = vuk::CompareOp::eGreaterOrEqual }) + .set_color_blend(visbuffer, vuk::BlendPreset::eOff) + .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) + .set_viewport(0, vuk::Rect2D::framebuffer()) + .set_scissor(0, vuk::Rect2D::framebuffer()) + .bind_persistent(1, descriptor_set) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_buffer(0, 5, materials) + .bind_image(0, 6, overdraw) + .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) + .draw_indexed_indirect(1, triangle_indirect); + + return std::make_tuple(camera, meshlet_instances, mesh_instances, meshes, transforms, materials, visbuffer, depth, overdraw); + } + ); + + std::tie( + camera_buffer, + meshlet_instances_buffer, + mesh_instances_buffer, + meshes_buffer, + transforms_buffer, + materials_buffer, + visbuffer_attachment, + depth_attachment, + overdraw_attachment + ) = + vis_encode_pass( + std::move(draw_command_buffer), + std::move(reordered_indices_buffer), + std::move(camera_buffer), + std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(depth_attachment), + std::move(overdraw_attachment) + ); +} + +static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Value &depth_attachment) -> void { + ZoneScoped; + + auto hiz_generate_pass = vuk::make_pass( + "hiz generate", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) src, + VUK_IA(vuk::eComputeRW) dst) { + auto extent = dst->extent; + auto mip_count = dst->level_count; + LS_EXPECT(mip_count < 13); + + auto dispatch_x = (extent.width + 63) >> 6; + auto dispatch_y = (extent.height + 63) >> 6; + + cmd_list // + .bind_compute_pipeline("passes.hiz") + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_count, (dispatch_x * dispatch_y) - 1, glm::mat2(1.0f))) + .specialize_constants(0, extent.width == extent.height && (extent.width & (extent.width - 1)) == 0 ? 1u : 0u) + .specialize_constants(1, extent.width) + .specialize_constants(2, extent.height); + + *cmd_list.scratch_buffer(0, 0) = 0; + cmd_list.bind_sampler(0, 1, hiz_sampler_info); + cmd_list.bind_image(0, 2, src); + + for (u32 i = 0; i < 13; i++) { + cmd_list.bind_image(0, i + 3, dst->mip(ls::min(i, mip_count - 1_u32))); + } + + cmd_list.dispatch(dispatch_x, dispatch_y); + + return std::make_tuple(src, dst); + } + ); + + auto hiz_generate_slow_pass = vuk::make_pass( + "hiz generate slow", + [](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) src, + VUK_IA(vuk::eComputeRW) dst) { + auto extent = dst->extent; + auto mip_count = dst->level_count; + + cmd_list // + .bind_compute_pipeline("passes.hiz_slow") + .bind_sampler(0, 0, hiz_sampler_info); + + for (auto i = 0_u32; i < mip_count; i++) { + auto mip_width = std::max(1_u32, extent.width >> i); + auto mip_height = std::max(1_u32, extent.height >> i); + + auto mip = dst->mip(i); + if (i == 0) { + cmd_list.bind_image(0, 1, src); + } else { + cmd_list.bind_image(0, 1, dst->mip(i - 1)); + } + + cmd_list.bind_image(0, 2, mip); + cmd_list.push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_width, mip_height)); + cmd_list.dispatch_invocations(mip_width, mip_height); + } + + return std::make_tuple(src, dst); + } + ); + + std::tie(depth_attachment, hiz_attachment) = hiz_generate_pass(std::move(depth_attachment), std::move(hiz_attachment)); +} + static auto draw_sky( SceneRenderer &self, vuk::Value &dst_attachment, @@ -637,19 +1011,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value({ .x = 0, .y = 1, .z = 1 }); - auto visible_meshlet_instances_indices_buffer = std::move(frame.visible_meshlet_instances_indices_buffer); - - std::tie( - camera_buffer, - meshlet_instances_buffer, - mesh_instances_buffer, + auto [early_draw_command_buffer, early_reordered_indices_buffer] = cull_scene( + false, + frame.max_meshlet_instance_count, + info.cull_flags, + transfer_man, + hiz_attachment, + cull_meshlets_cmd_buffer, + visible_meshlet_instances_count_buffer, + meshlet_visibility_mask_buffer, meshes_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, transforms_buffer, - hiz_attachment, - cull_triangles_cmd_buffer, - visible_meshlet_instances_indices_buffer, + camera_buffer, debug_drawer_buffer - ) = - vis_cull_meshlets_pass( - std::move(cull_meshlets_cmd_buffer), - std::move(camera_buffer), - std::move(meshlet_instances_buffer), - std::move(mesh_instances_buffer), - std::move(meshes_buffer), - std::move(transforms_buffer), - std::move(hiz_attachment), - std::move(visible_meshlet_instances_count_buffer), - std::move(cull_triangles_cmd_buffer), - std::move(visible_meshlet_instances_indices_buffer), - std::move(debug_drawer_buffer) - ); - - // ── CULL TRIANGLES ────────────────────────────────────────────────── - auto vis_cull_triangles_pass = vuk::make_pass( - "vis cull triangles", - [cull_flags = info.cull_flags]( - vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, - VUK_BA(vuk::eComputeRead) camera, - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, - VUK_BA(vuk::eComputeRead) meshlet_instances, - VUK_BA(vuk::eComputeRead) mesh_instances, - VUK_BA(vuk::eComputeRead) meshes, - VUK_BA(vuk::eComputeRead) transforms, - VUK_BA(vuk::eComputeRW) draw_indexed_cmd, - VUK_BA(vuk::eComputeWrite) reordered_indices - ) { - cmd_list // - .bind_compute_pipeline("passes.cull_triangles") - .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) - .bind_buffer(0, 3, mesh_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, draw_indexed_cmd) - .bind_buffer(0, 7, reordered_indices) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) - .dispatch_indirect(cull_triangles_cmd); - - return std::make_tuple( - camera, - visible_meshlet_instances_indices, - meshlet_instances, - mesh_instances, - meshes, - transforms, - draw_indexed_cmd, - reordered_indices - ); - } ); - auto draw_command_buffer = transfer_man.scratch_buffer({ .instanceCount = 1 }); - auto reordered_indices_buffer = std::move(frame.reordered_indices_buffer); - - std::tie( - camera_buffer, - visible_meshlet_instances_indices_buffer, - meshlet_instances_buffer, - mesh_instances_buffer, + draw_visbuffer( + bindless_descriptor_set, + depth_attachment, + visbuffer_attachment, + overdraw_attachment, + early_draw_command_buffer, + early_reordered_indices_buffer, meshes_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, transforms_buffer, - draw_command_buffer, - reordered_indices_buffer - ) = - vis_cull_triangles_pass( - std::move(cull_triangles_cmd_buffer), - std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), - std::move(meshlet_instances_buffer), - std::move(mesh_instances_buffer), - std::move(meshes_buffer), - std::move(transforms_buffer), - std::move(draw_command_buffer), - std::move(reordered_indices_buffer) - ); - - // ── VISBUFFER CLEAR ───────────────────────────────────────────────── - auto vis_clear_pass = vuk::make_pass( - "vis clear", - [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeWrite) visbuffer, - VUK_IA(vuk::eComputeWrite) overdraw) { - cmd_list // - .bind_compute_pipeline("passes.visbuffer_clear") - .bind_image(0, 0, visbuffer) - .bind_image(0, 1, overdraw) - .push_constants( - vuk::ShaderStageFlagBits::eCompute, - 0, - PushConstants(glm::uvec2(visbuffer->extent.width, visbuffer->extent.height)) - ) - .dispatch_invocations_per_pixel(visbuffer); - - return std::make_tuple(visbuffer, overdraw); - } - ); - - auto visbuffer_attachment = vuk::declare_ia( - "visbuffer", - { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eColorAttachment, - .format = vuk::Format::eR32Uint, - .sample_count = vuk::Samples::e1 } - ); - visbuffer_attachment.same_shape_as(final_attachment); - - auto overdraw_attachment = vuk::declare_ia( - "overdraw", - { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eColorAttachment, - .format = vuk::Format::eR32Uint, - .sample_count = vuk::Samples::e1 } + materials_buffer, + camera_buffer ); - overdraw_attachment.same_shape_as(final_attachment); - std::tie(visbuffer_attachment, overdraw_attachment) = vis_clear_pass(std::move(visbuffer_attachment), std::move(overdraw_attachment)); + draw_hiz(hiz_attachment, depth_attachment); - // ── VISBUFFER ENCODE ──────────────────────────────────────────────── - auto vis_encode_pass = vuk::make_pass( - "vis encode", - [descriptor_set = &bindless_descriptor_set]( - vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eIndirectRead) triangle_indirect, - VUK_BA(vuk::eIndexRead) index_buffer, - VUK_BA(vuk::eVertexRead) camera, - VUK_BA(vuk::eVertexRead) meshlet_instances, - VUK_BA(vuk::eVertexRead) mesh_instances, - VUK_BA(vuk::eVertexRead) meshes, - VUK_BA(vuk::eVertexRead) transforms, - VUK_BA(vuk::eFragmentRead) materials, - VUK_IA(vuk::eColorRW) visbuffer, - VUK_IA(vuk::eDepthStencilRW) depth, - VUK_IA(vuk::eFragmentRW) overdraw - ) { - cmd_list // - .bind_graphics_pipeline("passes.visbuffer_encode") - .set_rasterization({ .cullMode = vuk::CullModeFlagBits::eBack }) - .set_depth_stencil({ .depthTestEnable = true, .depthWriteEnable = true, .depthCompareOp = vuk::CompareOp::eGreaterOrEqual }) - .set_color_blend(visbuffer, vuk::BlendPreset::eOff) - .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) - .set_viewport(0, vuk::Rect2D::framebuffer()) - .set_scissor(0, vuk::Rect2D::framebuffer()) - .bind_persistent(1, *descriptor_set) - .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, meshlet_instances) - .bind_buffer(0, 2, mesh_instances) - .bind_buffer(0, 3, meshes) - .bind_buffer(0, 4, transforms) - .bind_buffer(0, 5, materials) - .bind_image(0, 6, overdraw) - .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) - .draw_indexed_indirect(1, triangle_indirect); - - return std::make_tuple(camera, meshlet_instances, mesh_instances, meshes, transforms, materials, visbuffer, depth, overdraw); - } + auto [late_draw_command_buffer, late_reordered_indices_buffer] = cull_scene( + true, + frame.max_meshlet_instance_count, + info.cull_flags, + transfer_man, + hiz_attachment, + cull_meshlets_cmd_buffer, + visible_meshlet_instances_count_buffer, + meshlet_visibility_mask_buffer, + meshes_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, + transforms_buffer, + camera_buffer, + debug_drawer_buffer ); - std::tie( - camera_buffer, - meshlet_instances_buffer, - mesh_instances_buffer, + draw_visbuffer( + bindless_descriptor_set, + depth_attachment, + visbuffer_attachment, + overdraw_attachment, + late_draw_command_buffer, + late_reordered_indices_buffer, meshes_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, transforms_buffer, materials_buffer, - visbuffer_attachment, - depth_attachment, - overdraw_attachment - ) = - vis_encode_pass( - std::move(draw_command_buffer), - std::move(reordered_indices_buffer), - std::move(camera_buffer), - std::move(meshlet_instances_buffer), - std::move(mesh_instances_buffer), - std::move(meshes_buffer), - std::move(transforms_buffer), - std::move(materials_buffer), - std::move(visbuffer_attachment), - std::move(depth_attachment), - std::move(overdraw_attachment) - ); + camera_buffer + ); // ── EDITOR MOUSE PICKING ──────────────────────────────────────────── if (info.picking_texel) { @@ -1044,73 +1243,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent; - auto mip_count = dst->level_count; - LS_EXPECT(mip_count < 13); - - auto dispatch_x = (extent.width + 63) >> 6; - auto dispatch_y = (extent.height + 63) >> 6; - - cmd_list // - .bind_compute_pipeline("passes.hiz") - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_count, (dispatch_x * dispatch_y) - 1, glm::mat2(1.0f))) - .specialize_constants(0, extent.width == extent.height && (extent.width & (extent.width - 1)) == 0 ? 1u : 0u) - .specialize_constants(1, extent.width) - .specialize_constants(2, extent.height); - - *cmd_list.scratch_buffer(0, 0) = 0; - cmd_list.bind_sampler(0, 1, hiz_sampler_info); - cmd_list.bind_image(0, 2, src); - - for (u32 i = 0; i < 13; i++) { - cmd_list.bind_image(0, i + 3, dst->mip(ls::min(i, mip_count - 1_u32))); - } - - cmd_list.dispatch(dispatch_x, dispatch_y); - - return std::make_tuple(src, dst); - } - ); - - auto hiz_generate_slow_pass = vuk::make_pass( - "hiz generate slow", - [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeSampled) src, - VUK_IA(vuk::eComputeRW) dst) { - auto extent = dst->extent; - auto mip_count = dst->level_count; - - cmd_list // - .bind_compute_pipeline("passes.hiz_slow") - .bind_sampler(0, 0, hiz_sampler_info); - - for (auto i = 0_u32; i < mip_count; i++) { - auto mip_width = std::max(1_u32, extent.width >> i); - auto mip_height = std::max(1_u32, extent.height >> i); - - auto mip = dst->mip(i); - if (i == 0) { - cmd_list.bind_image(0, 1, src); - } else { - cmd_list.bind_image(0, 1, dst->mip(i - 1)); - } - - cmd_list.bind_image(0, 2, mip); - cmd_list.push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_width, mip_height)); - cmd_list.dispatch_invocations(mip_width, mip_height); - } - - return std::make_tuple(src, dst); - } - ); - - std::tie(depth_attachment, hiz_attachment) = hiz_generate_pass(std::move(depth_attachment), std::move(hiz_attachment)); - // ── VISBUFFER DECODE ──────────────────────────────────────────────── auto vis_decode_pass = vuk::make_pass( "vis decode", @@ -1480,6 +1612,11 @@ auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { self.meshes_buffer = {}; } + if (self.meshlet_instance_visibility_mask_buffer) { + device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); + self.meshlet_instance_visibility_mask_buffer = {}; + } + if (self.materials_buffer) { device.destroy(self.materials_buffer.id()); self.materials_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 59748e8a..a6280e4e 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -25,13 +25,13 @@ struct FramePrepareInfo { struct PreparedFrame { u32 mesh_instance_count = 0; + u32 max_meshlet_instance_count = 0; GPU::EnvironmentFlags environment_flags = GPU::EnvironmentFlags::None; vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; vuk::Value meshlet_instances_buffer = {}; - vuk::Value visible_meshlet_instances_indices_buffer = {}; - vuk::Value reordered_indices_buffer = {}; + vuk::Value meshlet_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; @@ -56,6 +56,7 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; + Buffer meshlet_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; diff --git a/Lorr/Engine/Window/Window.cc b/Lorr/Engine/Window/Window.cc index 6cfa1bb5..673eabd3 100644 --- a/Lorr/Engine/Window/Window.cc +++ b/Lorr/Engine/Window/Window.cc @@ -194,8 +194,10 @@ auto Window::check_key_state(this Window &self, SDL_Scancode scancode, KeyState auto Window::set_relative_mouse(this Window &self, bool enabled) -> void { ZoneScoped; - auto center_rect = SDL_Rect{ .x = self.width / 2, .y = self.height / 2, .w = 1, .h = 1 }; - SDL_SetWindowMouseRect(self.handle, ¢er_rect); + + auto warp_rect = SDL_Rect{ .x = self.width / 2, .y = self.height / 2, .w = 1, .h = 1 }; + SDL_SetWindowMouseRect(self.handle, enabled ? &warp_rect : nullptr); + SDL_SetWindowRelativeMouseMode(self.handle, enabled); } diff --git a/xmake/packages.lua b/xmake/packages.lua index 5ab82d17..1e0acce2 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -59,7 +59,7 @@ add_requires("vuk 2025.07.09", { configs = { disable_exceptions = false, }, debug = is_mode("debug") }) -add_requires("meshoptimizer v0.24") +add_requires("meshoptimizer v0.25") add_requires("ktx v4.4.0", { debug = true }) add_requires("svector v1.0.3") From ebb61ddf47ad7a7275efa363be9aed930a376d63 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 23 Aug 2025 20:01:47 +0300 Subject: [PATCH 12/27] mesh level two pass occlusion culling --- Lorr/Engine/Resources/shaders/cull.slang | 8 +- .../shaders/passes/cull_meshes.slang | 108 ++++++ .../shaders/passes/cull_meshlets.slang | 48 +-- .../Resources/shaders/passes/hiz_slow.slang | 5 +- .../shaders/passes/select_lods.slang | 82 ----- Lorr/Engine/Scene/SceneRenderer.cc | 328 ++++++++++++------ Lorr/Engine/Scene/SceneRenderer.hh | 2 + 7 files changed, 349 insertions(+), 232 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/passes/cull_meshes.slang delete mode 100644 Lorr/Engine/Resources/shaders/passes/select_lods.slang diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index 315f3a09..94db724e 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -101,13 +101,7 @@ public func test_occlusion( let size = max_texel - min_texel + 1; let max_size = max(size.x, size.y); - var mip = firstbithigh(max_size - 1); - let smin = min_texel >> mip; - let smax = max_texel >> mip; - if (any(smax - smin > 1)) { - mip += 1; - } - + var mip = max(0.0, ceil(log2(max_size))); var uv = (min_uv + max_uv) * 0.5; let d = hiz_image.sample_mip(hiz_sampler, uv, mip); return screen_aabb.max.z <= d; diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang new file mode 100644 index 00000000..0bb16313 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -0,0 +1,108 @@ +import std; +import gpu; +import scene; +import cull; +import debug_drawer; + +[[vk::constant_id(0)]] const u32 LATE = 0; +[[vk::binding(0)]] ConstantBuffer camera; +[[vk::binding(1)]] StructuredBuffer meshes; +[[vk::binding(2)]] StructuredBuffer transforms; +[[vk::binding(3)]] Image2D hiz_image; +[[vk::binding(4)]] Sampler hiz_sampler; +[[vk::binding(5)]] RWStructuredBuffer mesh_instances; +[[vk::binding(6)]] RWStructuredBuffer meshlet_instances; +[[vk::binding(7)]] RWStructuredBuffer mesh_visibility_mask; +[[vk::binding(8)]] RWStructuredBuffer meshlet_instances_count; +[[vk::binding(9)]] RWStructuredBuffer debug_drawer; + +#ifndef CULLING_MESHES_COUNT + #define CULLING_MESHES_COUNT 64 +#endif + +[[shader("compute")]] +[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] +func cs_main( + uint3 thread_id : SV_DispatchThreadID, + uniform u32 mesh_instances_count, + uniform CullFlags cull_flags +) -> void { + let mesh_instance_index = thread_id.x; + if (mesh_instance_index >= mesh_instances_count) { + return; + } + + let was_visible = mesh_visibility_mask[mesh_instance_index] == 1; + + let mesh_instance = &mesh_instances[mesh_instance_index]; + let mesh = meshes[mesh_instance.mesh_index]; + let transform = transforms[mesh_instance.transform_index]; + let mvp = mul(camera.projection_view_mat, transform.world); + + var visible = (LATE == 1) ? true : was_visible; + if (visible) { + visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); + } + + if (LATE == 1 && visible && (cull_flags & CullFlags::Occlusion)) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler); + if (visible && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } + } + } + + if (visible && (LATE == 0 || !was_visible)) { + var lod_index = 0; +#if 1 + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); + + // Avoiding the atan here + let rough_resolution = max(camera.resolution.x, camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < camera.acceptable_lod_error) { + lod_index = i; + } else { + break; + } + } +#endif + + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_instance_offset = std::atomic_add(meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { + let offset = meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + meshlet_instances[offset] = meshlet_instance; + } + } + + if (LATE == 1) { + mesh_visibility_mask[mesh_instance_index] = visible ? 1 : 0; + } +} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index d55e325b..28ff8ac6 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -24,10 +24,6 @@ import debug_drawer; #define CULLING_MESHLET_COUNT 64 #endif -// TODO: Replace this when vuk can support bool constants -constexpr static bool IS_EARLY = LATE == 0; -constexpr static bool IS_LATE = LATE == 1; - [[shader("compute")]] [[numthreads(CULLING_MESHLET_COUNT, 1, 1)]] func cs_main( @@ -40,6 +36,11 @@ func cs_main( return; } + let mask_index = meshlet_instance_index / 32; + let bit_index = meshlet_instance_index - mask_index * 32; + let visibility_bit = 1 << bit_index; + let was_visible = (meshlet_visibility_mask[mask_index] & visibility_bit) != 0; + let meshlet_instance = meshlet_instances[meshlet_instance_index]; let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; @@ -48,41 +49,18 @@ func cs_main( let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; var visible = true; - - let mask_index = meshlet_instance_index / 32; - let bit_index = meshlet_instance_index - mask_index * 32; - let visibility_bit = 1 << bit_index; - let was_visible = bool(meshlet_visibility_mask[mask_index] & visibility_bit); - if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - let cur_mvp = mul(camera.projection_view_mat, transform.world); - visible = test_frustum(cur_mvp, bounds.aabb_center, bounds.aabb_extent); + let mvp = mul(camera.projection_view_mat, transform.world); + visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } - if (IS_LATE && visible && (cull_flags & CullFlags::Occlusion)) { - let prev_mvp = mul(camera.frustum_projection_view_mat, transform.world); - if (let screen_aabb = project_aabb(prev_mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler); - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 0.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); - } - } - } - - if (IS_LATE && (cull_flags & CullFlags::Occlusion)) { - if (visible) { - std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); - } else { - std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); - } + /* + if (visible) { + std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); + } else { + std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); } + */ if (visible) { let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); diff --git a/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang index efafd6cc..3af00d0f 100644 --- a/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang +++ b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang @@ -14,8 +14,9 @@ struct ShaderParameters { func cs_main( u32x2 thread_id : SV_DispatchThreadID, uniform ParameterBlock params, - uniform u32x2 src_image_size + uniform u32x2 src_image_size, + uniform u32 mip_index ) -> void { - let c = params.src_image.sample(params.sampler, (f32x2(thread_id) + 0.5) / f32x2(src_image_size)).r; + let c = params.src_image.sample_mip(params.sampler, (f32x2(thread_id) + 0.5) / f32x2(src_image_size), mip_index).r; params.dst_mip.store(thread_id.xy, c); } diff --git a/Lorr/Engine/Resources/shaders/passes/select_lods.slang b/Lorr/Engine/Resources/shaders/passes/select_lods.slang deleted file mode 100644 index f5d9cce4..00000000 --- a/Lorr/Engine/Resources/shaders/passes/select_lods.slang +++ /dev/null @@ -1,82 +0,0 @@ -import std; -import gpu; -import scene; -import cull; -import debug_drawer; - -struct ShaderParameters { - ConstantBuffer camera; - StructuredBuffer meshes; - StructuredBuffer transforms; - - RWStructuredBuffer mesh_instances; - RWStructuredBuffer meshlet_instances; - RWStructuredBuffer meshlet_instances_count; - RWStructuredBuffer debug_drawer; -}; - -#ifndef CULLING_MESHES_COUNT - #define CULLING_MESHES_COUNT 64 -#endif - -[[shader("compute")]] -[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] -func cs_main( - uint3 thread_id : SV_DispatchThreadID, - uniform ParameterBlock params, - uniform u32 mesh_instances_count, - uniform CullFlags cull_flags -) -> void { - let mesh_instance_index = thread_id.x; - if (mesh_instance_index >= mesh_instances_count) { - return; - } - - let mesh_instance = ¶ms.mesh_instances[mesh_instance_index]; - let mesh = params.meshes[mesh_instance.mesh_index]; - let transform = params.transforms[mesh_instance.transform_index]; - let mvp = mul(params.camera.projection_view_mat, transform.world); - if (!test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { - return; - } - - var lod_index = 0; - if (true) { - // Credits: - // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 - let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; - let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; - let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; - let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; - let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); - let aabb_rough_camera_distance = max(length(aabb_center - params.camera.position) - 0.5 * aabb_rough_extent, 0.0); - - // Avoiding the atan here - let rough_resolution = max(params.camera.resolution.x, params.camera.resolution.y); - let fov90_distance_to_screen_ratio = 2.0f; - let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; - let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); - let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; - - for (var i = 1; i < mesh.lod_count; i++) { - let mesh_lod = mesh.lods[i]; - let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; - if (rough_pixel_error < params.camera.acceptable_lod_error) { - lod_index = i; - } else { - break; - } - } - } - - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - let meshlet_instance_offset = std::atomic_add(params.meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); - for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { - let offset = meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - params.meshlet_instances[offset] = meshlet_instance; - } -} diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 759849e6..767b4755 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -114,11 +114,11 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, generate_cull_commands_pipeline_info).value(); - auto vis_select_lods_pipeline_info = PipelineCompileInfo{ - .module_name = "passes.select_lods", + auto vis_cull_meshes_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.cull_meshes", .entry_points = { "cs_main" }, }; - Pipeline::create(device, default_slang_session, vis_select_lods_pipeline_info).value(); + Pipeline::create(device, default_slang_session, vis_cull_meshes_pipeline_info).value(); auto vis_cull_meshlets_pipeline_info = PipelineCompileInfo{ .module_name = "passes.cull_meshlets", @@ -328,8 +328,23 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.gpu_mesh_instances.empty()) { self.mesh_instances_buffer = self.mesh_instances_buffer.resize(device, info.gpu_mesh_instances.size_bytes()).value(); prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); + + auto mesh_instance_visibility_mask_size_bytes = info.mesh_instance_count * sizeof(u32); + if (mesh_instance_visibility_mask_size_bytes > self.mesh_instance_visibility_mask_buffer.data_size()) { + self.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.resize(device, mesh_instance_visibility_mask_size_bytes).value(); + prepared_frame.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eNone); + prepared_frame.mesh_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.mesh_instance_visibility_mask_buffer)); + } else { + prepared_frame.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); + } + } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); + prepared_frame.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); } if (info.max_meshlet_instance_count > 0) { @@ -349,8 +364,10 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instance visibility mask", vuk::eMemoryRead); } } else { - device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); - self.meshlet_instance_visibility_mask_buffer = {}; + if (self.meshlet_instance_visibility_mask_buffer) { + device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); + self.meshlet_instance_visibility_mask_buffer = {}; + } } info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); @@ -421,22 +438,129 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in return prepared_frame; } -static auto cull_scene( +static auto cull_meshes( + bool late, + GPU::CullFlags cull_flags, + u32 mesh_instance_count, + TransferManager &transfer_man, + vuk::Value &hiz_attachment, + vuk::Value &meshes_buffer, + vuk::Value &mesh_instances_buffer, + vuk::Value &meshlet_instances_buffer, + vuk::Value &visible_meshlet_instances_count_buffer, + vuk::Value &mesh_visibility_mask_buffer, + vuk::Value &transforms_buffer, + vuk::Value &camera_buffer, + vuk::Value &debug_drawer_buffer +) -> vuk::Value { + ZoneScoped; + + auto vis_cull_meshes_pass = vuk::make_pass( + "vis cull meshes", + [mesh_instance_count, cull_flags, late]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) meshes, + VUK_BA(vuk::eComputeRead) transforms, + VUK_IA(vuk::eComputeSampled) hiz, + VUK_BA(vuk::eComputeRW) mesh_instances, + VUK_BA(vuk::eComputeRW) meshlet_instances, + VUK_BA(vuk::eComputeRW) mesh_visibility_mask, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) debug_drawer + ) { + cmd_list // + .bind_compute_pipeline("passes.cull_meshes") + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshes) + .bind_buffer(0, 2, transforms) + .bind_image(0, 3, hiz) + .bind_sampler(0, 4, hiz_sampler_info) + .bind_buffer(0, 5, mesh_instances) + .bind_buffer(0, 6, meshlet_instances) + .bind_buffer(0, 7, mesh_visibility_mask) + .bind_buffer(0, 8, visible_meshlet_instances_count) + .bind_buffer(0, 9, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) + .specialize_constants(0, late ? 1 : 0) + .dispatch_invocations(mesh_instance_count); + + return std::make_tuple( + camera, + meshes, + transforms, + hiz, + mesh_instances, + meshlet_instances, + mesh_visibility_mask, + visible_meshlet_instances_count, + debug_drawer + ); + } + ); + + std::tie( + camera_buffer, + meshes_buffer, + transforms_buffer, + hiz_attachment, + mesh_instances_buffer, + meshlet_instances_buffer, + mesh_visibility_mask_buffer, + visible_meshlet_instances_count_buffer, + debug_drawer_buffer + ) = + vis_cull_meshes_pass( + std::move(camera_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(hiz_attachment), + std::move(mesh_instances_buffer), + std::move(meshlet_instances_buffer), + std::move(mesh_visibility_mask_buffer), + std::move(visible_meshlet_instances_count_buffer), + std::move(debug_drawer_buffer) + ); + + auto generate_cull_commands_pass = vuk::make_pass( + "generate cull commands", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { + cmd_list // + .bind_compute_pipeline("passes.generate_cull_commands") + .bind_buffer(0, 0, visible_meshlet_instances_count) + .bind_buffer(0, 1, cull_meshlets_cmd) + .dispatch(1); + + return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); + } + ); + + auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); + std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = + generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); + + return cull_meshlets_cmd_buffer; +} + +static auto cull_meshlets( bool late_pass, - u32 max_meshlet_instance_count, GPU::CullFlags cull_flags, TransferManager &transfer_man, vuk::Value &hiz_attachment, vuk::Value &cull_meshlets_cmd_buffer, vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &meshlet_instance_visibility_mask_buffer, + vuk::Value &visible_meshlet_instances_indices_buffer, + vuk::Value &reordered_indices_buffer, vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, vuk::Value &meshlet_instances_buffer, vuk::Value &transforms_buffer, vuk::Value &camera_buffer, vuk::Value &debug_drawer_buffer -) -> std::tuple, vuk::Value> { +) -> vuk::Value { ZoneScoped; // ── CULL MESHLETS ─────────────────────────────────────────────────── @@ -450,7 +574,7 @@ static auto cull_scene( VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, - VUK_IA(vuk::eComputeRead) hiz, + VUK_IA(vuk::eComputeSampled) hiz, VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) meshlet_instance_visibility_mask, VUK_BA(vuk::eComputeRW) cull_triangles_cmd, @@ -493,8 +617,6 @@ static auto cull_scene( ); auto cull_triangles_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - auto visible_meshlet_instances_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, max_meshlet_instance_count * sizeof(u32)); std::tie( cull_meshlets_cmd_buffer, @@ -567,8 +689,6 @@ static auto cull_scene( ); auto draw_command_buffer = transfer_man.scratch_buffer({ .instanceCount = 1 }); - auto reordered_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32)); std::tie( camera_buffer, @@ -592,7 +712,7 @@ static auto cull_scene( std::move(reordered_indices_buffer) ); - return std::make_tuple(draw_command_buffer, reordered_indices_buffer); + return draw_command_buffer; } static auto draw_visbuffer( @@ -611,24 +731,6 @@ static auto draw_visbuffer( ) -> void { ZoneScoped; - auto vis_clear_pass = vuk::make_pass( - "vis clear", - [](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeWrite) visbuffer, - VUK_IA(vuk::eComputeWrite) overdraw) { - cmd_list // - .bind_compute_pipeline("passes.visbuffer_clear") - .bind_image(0, 0, visbuffer) - .bind_image(0, 1, overdraw) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(glm::uvec2(visbuffer->extent.width, visbuffer->extent.height))) - .dispatch_invocations_per_pixel(visbuffer); - - return std::make_tuple(visbuffer, overdraw); - } - ); - - std::tie(visbuffer_attachment, overdraw_attachment) = vis_clear_pass(std::move(visbuffer_attachment), std::move(overdraw_attachment)); - auto vis_encode_pass = vuk::make_pass( "vis encode", [&descriptor_set]( @@ -664,11 +766,23 @@ static auto draw_visbuffer( .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) .draw_indexed_indirect(1, triangle_indirect); - return std::make_tuple(camera, meshlet_instances, mesh_instances, meshes, transforms, materials, visbuffer, depth, overdraw); + return std::make_tuple( + index_buffer, + camera, + meshlet_instances, + mesh_instances, + meshes, + transforms, + materials, + visbuffer, + depth, + overdraw + ); } ); std::tie( + reordered_indices_buffer, camera_buffer, meshlet_instances_buffer, mesh_instances_buffer, @@ -754,7 +868,7 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu } cmd_list.bind_image(0, 2, mip); - cmd_list.push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_width, mip_height)); + cmd_list.push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mip_width, mip_height, i)); cmd_list.dispatch_invocations(mip_width, mip_height); } @@ -762,7 +876,7 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu } ); - std::tie(depth_attachment, hiz_attachment) = hiz_generate_pass(std::move(depth_attachment), std::move(hiz_attachment)); + std::tie(depth_attachment, hiz_attachment) = hiz_generate_slow_pass(std::move(depth_attachment), std::move(hiz_attachment)); } static auto draw_sky( @@ -1008,7 +1122,8 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent.width, visbuffer->extent.height)) + ) + .dispatch_invocations_per_pixel(visbuffer); + + return std::make_tuple(visbuffer, overdraw); + } + ); + + std::tie(visbuffer_attachment, overdraw_attachment) = vis_clear_pass(std::move(visbuffer_attachment), std::move(overdraw_attachment)); + auto transforms_buffer = std::move(frame.transforms_buffer); auto meshes_buffer = std::move(frame.meshes_buffer); auto mesh_instances_buffer = std::move(frame.mesh_instances_buffer); auto meshlet_instances_buffer = std::move(frame.meshlet_instances_buffer); auto materials_buffer = std::move(frame.materials_buffer); + auto mesh_visibility_mask_buffer = std::move(frame.mesh_instance_visibility_mask_buffer); auto meshlet_visibility_mask_buffer = std::move(frame.meshlet_instance_visibility_mask_buffer); - // ── CULL MESHES ───────────────────────────────────────────────────── - auto vis_select_lods_pass = vuk::make_pass( - "vis select lods", - [mesh_instance_count = frame.mesh_instance_count, cull_flags = info.cull_flags]( - vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eComputeRead) camera, - VUK_BA(vuk::eComputeRead) meshes, - VUK_BA(vuk::eComputeRead) transforms, - VUK_BA(vuk::eComputeRW) mesh_instances, - VUK_BA(vuk::eComputeRW) meshlet_instances, - VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) debug_drawer - ) { - cmd_list // - .bind_compute_pipeline("passes.select_lods") - .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, meshes) - .bind_buffer(0, 2, transforms) - .bind_buffer(0, 3, mesh_instances) - .bind_buffer(0, 4, meshlet_instances) - .bind_buffer(0, 5, visible_meshlet_instances_count) - .bind_buffer(0, 6, debug_drawer) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) - .dispatch_invocations(mesh_instance_count); - - return std::make_tuple(camera, meshes, transforms, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); - } - ); - auto visible_meshlet_instances_count_buffer = transfer_man.scratch_buffer({ 0 }); + auto visible_meshlet_instances_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, frame.max_meshlet_instance_count * sizeof(u32)); + auto reordered_indices_buffer = transfer_man.alloc_transient_buffer( + vuk::MemoryUsage::eGPUonly, + frame.max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) + ); - std::tie( - camera_buffer, + auto early_cull_meshlets_cmd_buffer = cull_meshes( + false, // early + info.cull_flags, + frame.mesh_instance_count, + transfer_man, + hiz_attachment, meshes_buffer, - transforms_buffer, mesh_instances_buffer, meshlet_instances_buffer, visible_meshlet_instances_count_buffer, + mesh_visibility_mask_buffer, + transforms_buffer, + camera_buffer, debug_drawer_buffer - ) = - vis_select_lods_pass( - std::move(camera_buffer), - std::move(meshes_buffer), - std::move(transforms_buffer), - std::move(mesh_instances_buffer), - std::move(meshlet_instances_buffer), - std::move(visible_meshlet_instances_count_buffer), - std::move(debug_drawer_buffer) - ); - - auto generate_cull_commands_pass = vuk::make_pass( - "generate cull commands", - [](vuk::CommandBuffer &cmd_list, // - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { - cmd_list // - .bind_compute_pipeline("passes.generate_cull_commands") - .bind_buffer(0, 0, visible_meshlet_instances_count) - .bind_buffer(0, 1, cull_meshlets_cmd) - .dispatch(1); - - return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); - } ); - auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = - generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); - - auto [early_draw_command_buffer, early_reordered_indices_buffer] = cull_scene( - false, - frame.max_meshlet_instance_count, + auto early_draw_command_buffer = cull_meshlets( + false, // early info.cull_flags, transfer_man, hiz_attachment, - cull_meshlets_cmd_buffer, + early_cull_meshlets_cmd_buffer, visible_meshlet_instances_count_buffer, meshlet_visibility_mask_buffer, + visible_meshlet_instances_indices_buffer, + reordered_indices_buffer, meshes_buffer, mesh_instances_buffer, meshlet_instances_buffer, @@ -1162,7 +1256,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value void { self.meshes_buffer = {}; } + if (self.mesh_instance_visibility_mask_buffer) { + device.destroy(self.mesh_instance_visibility_mask_buffer.id()); + self.mesh_instance_visibility_mask_buffer = {}; + } + if (self.meshlet_instance_visibility_mask_buffer) { device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); self.meshlet_instance_visibility_mask_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index a6280e4e..f006aaad 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -30,6 +30,7 @@ struct PreparedFrame { vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; + vuk::Value mesh_instance_visibility_mask_buffer = {}; vuk::Value meshlet_instances_buffer = {}; vuk::Value meshlet_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; @@ -56,6 +57,7 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; + Buffer mesh_instance_visibility_mask_buffer = {}; Buffer meshlet_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; From bfa366c7225dba3f2d931a88efa014ad068fc5ee Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 24 Aug 2025 21:23:38 +0300 Subject: [PATCH 13/27] meshlet index offsetting --- Lorr/Editor/Window/ViewportWindow.cc | 4 +- Lorr/Editor/main.cc | 2 +- Lorr/Engine/Resources/shaders/cull.slang | 11 +- .../shaders/passes/cull_meshes.slang | 32 +++-- .../shaders/passes/cull_meshlets.slang | 80 ++++++++++--- .../passes/generate_cull_commands.slang | 20 ++-- Lorr/Engine/Resources/shaders/scene.slang | 4 +- Lorr/Engine/Scene/GPUScene.hh | 14 ++- Lorr/Engine/Scene/SceneRenderer.cc | 110 ++++++++++++------ 9 files changed, 194 insertions(+), 83 deletions(-) diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index 46e8cd08..c8518f91 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -151,10 +151,12 @@ static auto draw_tools(ViewportWindow &self) -> void { auto &cull_flags = reinterpret_cast(active_scene->get_cull_flags()); auto &scene_renderer = lr::App::mod(); + ImGui::CheckboxFlags("Cull Mesh Frustum", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MeshFrustum)); + ImGui::CheckboxFlags("Cull Mesh Occlusion", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MeshOcclusion)); ImGui::CheckboxFlags("Cull Meshlet Frustum", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MeshletFrustum)); + ImGui::CheckboxFlags("Cull Meshlet Occlusion", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MeshletOcclusion)); ImGui::CheckboxFlags("Cull Triangle Back Face", &cull_flags, std::to_underlying(lr::GPU::CullFlags::TriangleBackFace)); ImGui::CheckboxFlags("Cull Micro Triangles", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MicroTriangles)); - ImGui::CheckboxFlags("Cull Occlusion", &cull_flags, std::to_underlying(lr::GPU::CullFlags::Occlusion)); ImGui::Checkbox("Debug Lines", &scene_renderer.debug_lines); } } diff --git a/Lorr/Editor/main.cc b/Lorr/Editor/main.cc index 6b2c9d43..d789abd5 100755 --- a/Lorr/Editor/main.cc +++ b/Lorr/Editor/main.cc @@ -18,7 +18,7 @@ i32 main(i32, c8 **) { }; lr::AppBuilder() // - .module(3) + .module(1) .module(window_info) .module() .module() diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index 94db724e..9097e046 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -88,7 +88,8 @@ public func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_ext public func test_occlusion( in ScreenAabb screen_aabb, in Image2D hiz_image, - in Sampler hiz_sampler + in Sampler hiz_sampler, + in constexpr bool ceiling ) -> bool { var hiz_size = u32x2(0.0); var hiz_levels = 0; @@ -101,7 +102,13 @@ public func test_occlusion( let size = max_texel - min_texel + 1; let max_size = max(size.x, size.y); - var mip = max(0.0, ceil(log2(max_size))); + var mip = 0.0; + if (ceiling) { + mip = max(0.0, ceil(log2(max_size))); + } else { + mip = floor(log2(max_size)); + } + var uv = (min_uv + max_uv) * 0.5; let d = hiz_image.sample_mip(hiz_sampler, uv, mip); return screen_aabb.max.z <= d; diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 0bb16313..4fceb215 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -13,8 +13,9 @@ import debug_drawer; [[vk::binding(5)]] RWStructuredBuffer mesh_instances; [[vk::binding(6)]] RWStructuredBuffer meshlet_instances; [[vk::binding(7)]] RWStructuredBuffer mesh_visibility_mask; -[[vk::binding(8)]] RWStructuredBuffer meshlet_instances_count; -[[vk::binding(9)]] RWStructuredBuffer debug_drawer; +[[vk::binding(8)]] RWStructuredBuffer early_visible_meshlet_instances_count; +[[vk::binding(9)]] RWStructuredBuffer late_visible_meshlet_instances_count; +[[vk::binding(10)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHES_COUNT #define CULLING_MESHES_COUNT 64 @@ -39,21 +40,29 @@ func cs_main( let transform = transforms[mesh_instance.transform_index]; let mvp = mul(camera.projection_view_mat, transform.world); - var visible = (LATE == 1) ? true : was_visible; - if (visible) { + let cull_occlusion = (cull_flags & CullFlags::MeshOcclusion) != 0; + + var visible = true; + if (LATE == 0 && !was_visible) { + // During previous frame, if mesh was NOT visible, we don't render it. + visible = false; + } + + if (visible && (cull_flags & CullFlags::MeshFrustum)) { visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); } - if (LATE == 1 && visible && (cull_flags & CullFlags::Occlusion)) { + if (LATE == 1 && visible && cull_occlusion) { if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler); + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); + if (visible && true) { let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; var debug_rect = DebugRect(); debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.color = f32x3(1.0, 0.0, 1.0); debug_rect.coord = DebugDrawCoord::NDC; debug_draw_rect(debug_drawer[0], debug_rect); } @@ -92,7 +101,14 @@ func cs_main( mesh_instance.lod_index = lod_index; let mesh_lod = mesh.lods[lod_index]; - let meshlet_instance_offset = std::atomic_add(meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + var meshlet_instance_offset = 0; + if (LATE == 0) { + meshlet_instance_offset = std::atomic_add(early_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + } else { + meshlet_instance_offset = std::atomic_add(late_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + meshlet_instance_offset += early_visible_meshlet_instances_count[0]; + } + for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { let offset = meshlet_instance_offset + i; var meshlet_instance = MeshletInstance(); diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 28ff8ac6..7b150006 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -14,11 +14,12 @@ import debug_drawer; [[vk::binding(4)]] StructuredBuffer transforms; [[vk::binding(5)]] Image2D hiz_image; [[vk::binding(6)]] Sampler hiz_sampler; -[[vk::binding(7)]] StructuredBuffer visible_meshlet_instances_count; -[[vk::binding(8)]] RWStructuredBuffer meshlet_visibility_mask; -[[vk::binding(9)]] RWStructuredBuffer cull_triangles_cmd; -[[vk::binding(10)]] RWStructuredBuffer visible_meshlet_instances_indices; -[[vk::binding(11)]] RWStructuredBuffer debug_drawer; +[[vk::binding(7)]] StructuredBuffer early_visible_meshlet_instances_count; +[[vk::binding(8)]] StructuredBuffer late_visible_meshlet_instances_count; +[[vk::binding(9)]] RWStructuredBuffer meshlet_visibility_mask; +[[vk::binding(10)]] RWStructuredBuffer cull_triangles_cmd; +[[vk::binding(11)]] RWStructuredBuffer visible_meshlet_instances_indices; +[[vk::binding(12)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHLET_COUNT #define CULLING_MESHLET_COUNT 64 @@ -30,14 +31,19 @@ func cs_main( uint3 thread_id : SV_DispatchThreadID, uniform CullFlags cull_flags, ) -> void { - let meshlet_instance_count = visible_meshlet_instances_count[0]; - let meshlet_instance_index = thread_id.x; - if (meshlet_instance_index >= meshlet_instance_count) { + let meshlet_instance_count = (LATE == 0) ? early_visible_meshlet_instances_count[0] : late_visible_meshlet_instances_count[0]; + let local_meshlet_instance_index = thread_id.x; + if (local_meshlet_instance_index >= meshlet_instance_count) { return; } + var meshlet_instance_index = local_meshlet_instance_index; + if (LATE == 1) { + meshlet_instance_index += early_visible_meshlet_instances_count[0]; + } + let mask_index = meshlet_instance_index / 32; - let bit_index = meshlet_instance_index - mask_index * 32; + let bit_index = meshlet_instance_index - (mask_index * 32); let visibility_bit = 1 << bit_index; let was_visible = (meshlet_visibility_mask[mask_index] & visibility_bit) != 0; @@ -48,22 +54,64 @@ func cs_main( let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; + let cull_occlusion = (cull_flags & CullFlags::MeshletOcclusion) != 0; + let mvp = mul(camera.projection_view_mat, transform.world); + +#if 1 var visible = true; + if (LATE == 0 && !was_visible) { + // During previous frame, if meshlet was NOT visible, we don't render it. + visible = false; + } + + var skip = false; + if (LATE == 1 && was_visible) { + skip = true; + } + if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - let mvp = mul(camera.projection_view_mat, transform.world); visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } - /* - if (visible) { - std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); - } else { - std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); + if (LATE == 1 && visible && cull_occlusion) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); + + if (visible && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } + } + } + + if (LATE == 1 && cull_occlusion) { + if (visible) { + std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_acq_rel); + } else { + std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_acq_rel); + } + } + + if (visible && !skip) { + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; + } + +#else + var visible = true; + if (visible && (cull_flags & CullFlags::MeshletFrustum)) { + visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } - */ if (visible) { let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); visible_meshlet_instances_indices[index] = meshlet_instance_index; } +#endif } diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang index 3495f0fe..f1b13fe8 100644 --- a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -1,17 +1,17 @@ import std; import gpu; -struct ShaderParameters { - StructuredBuffer meshlet_instances_count; - - RWStructuredBuffer cull_meshlets_cmd; -}; +[[vk::constant_id(0)]] const u32 LATE = 0; +[[vk::binding(0)]] StructuredBuffer early_visible_meshlet_instances_count; +[[vk::binding(1)]] StructuredBuffer late_visible_meshlet_instances_count; +[[vk::binding(2)]] RWStructuredBuffer cull_meshlets_cmd; [[shader("compute")]] [[numthreads(1, 1, 1)]] -func cs_main( - uniform ParameterBlock params -) -> void { - params.cull_meshlets_cmd[0].x = (params.meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; +func cs_main() -> void { + if (LATE == 0) { + cull_meshlets_cmd[0].x = (early_visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; + } else { + cull_meshlets_cmd[0].x = (late_visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; + } } - diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 9a936be5..f68a65f0 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -22,10 +22,12 @@ public enum DebugView : i32 { [[Flags]] public enum CullFlags : u32 { + MeshFrustum, + MeshOcclusion, MeshletFrustum, + MeshletOcclusion, TriangleBackFace, MicroTriangles, - Occlusion, }; [[Flags]] diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 554bb063..72c76b4b 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -55,12 +55,14 @@ struct DebugDrawer { }; enum class CullFlags : u32 { - MeshletFrustum = 1 << 0, - TriangleBackFace = 1 << 1, - MicroTriangles = 1 << 2, - Occlusion = 1 << 3, - - All = MeshletFrustum | TriangleBackFace | MicroTriangles | Occlusion, + MeshFrustum = 1 << 0, + MeshOcclusion = 1 << 1, + MeshletFrustum = 1 << 2, + MeshletOcclusion = 1 << 3, + TriangleBackFace = 1 << 4, + MicroTriangles = 1 << 5, + + All = ~0_u32, }; enum EnvironmentFlags : u32 { diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 767b4755..d4b2dff5 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -5,6 +5,7 @@ #include "Engine/Core/App.hh" #include "Engine/Graphics/VulkanDevice.hh" +#include "Engine/Memory/Stack.hh" namespace lr { enum BindlessDescriptorLayout : u32 { @@ -447,16 +448,18 @@ static auto cull_meshes( vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, vuk::Value &meshlet_instances_buffer, - vuk::Value &visible_meshlet_instances_count_buffer, + vuk::Value &early_visible_meshlet_instances_count_buffer, + vuk::Value &late_visible_meshlet_instances_count_buffer, vuk::Value &mesh_visibility_mask_buffer, vuk::Value &transforms_buffer, vuk::Value &camera_buffer, vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; + memory::ScopedStack stack; auto vis_cull_meshes_pass = vuk::make_pass( - "vis cull meshes", + stack.format("vis cull meshes {}", late ? "late" : "early"), [mesh_instance_count, cull_flags, late]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eComputeRead) camera, @@ -466,7 +469,8 @@ static auto cull_meshes( VUK_BA(vuk::eComputeRW) mesh_instances, VUK_BA(vuk::eComputeRW) meshlet_instances, VUK_BA(vuk::eComputeRW) mesh_visibility_mask, - VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) early_visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) late_visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // @@ -479,8 +483,9 @@ static auto cull_meshes( .bind_buffer(0, 5, mesh_instances) .bind_buffer(0, 6, meshlet_instances) .bind_buffer(0, 7, mesh_visibility_mask) - .bind_buffer(0, 8, visible_meshlet_instances_count) - .bind_buffer(0, 9, debug_drawer) + .bind_buffer(0, 8, early_visible_meshlet_instances_count) + .bind_buffer(0, 9, late_visible_meshlet_instances_count) + .bind_buffer(0, 10, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) .specialize_constants(0, late ? 1 : 0) .dispatch_invocations(mesh_instance_count); @@ -493,7 +498,8 @@ static auto cull_meshes( mesh_instances, meshlet_instances, mesh_visibility_mask, - visible_meshlet_instances_count, + early_visible_meshlet_instances_count, + late_visible_meshlet_instances_count, debug_drawer ); } @@ -507,7 +513,8 @@ static auto cull_meshes( mesh_instances_buffer, meshlet_instances_buffer, mesh_visibility_mask_buffer, - visible_meshlet_instances_count_buffer, + early_visible_meshlet_instances_count_buffer, + late_visible_meshlet_instances_count_buffer, debug_drawer_buffer ) = vis_cull_meshes_pass( @@ -518,39 +525,50 @@ static auto cull_meshes( std::move(mesh_instances_buffer), std::move(meshlet_instances_buffer), std::move(mesh_visibility_mask_buffer), - std::move(visible_meshlet_instances_count_buffer), + std::move(early_visible_meshlet_instances_count_buffer), + std::move(late_visible_meshlet_instances_count_buffer), std::move(debug_drawer_buffer) ); auto generate_cull_commands_pass = vuk::make_pass( - "generate cull commands", - [](vuk::CommandBuffer &cmd_list, // - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { + stack.format("generate cull commands {}", late ? "late" : "early"), + [late]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) early_visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRead) late_visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd + ) { cmd_list // .bind_compute_pipeline("passes.generate_cull_commands") - .bind_buffer(0, 0, visible_meshlet_instances_count) - .bind_buffer(0, 1, cull_meshlets_cmd) + .bind_buffer(0, 0, early_visible_meshlet_instances_count) + .bind_buffer(0, 1, late_visible_meshlet_instances_count) + .bind_buffer(0, 2, cull_meshlets_cmd) + .specialize_constants(0, late ? 1 : 0) .dispatch(1); - return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); + return std::make_tuple(early_visible_meshlet_instances_count, late_visible_meshlet_instances_count, cull_meshlets_cmd); } ); auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = - generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); + std::tie(early_visible_meshlet_instances_count_buffer, late_visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = + generate_cull_commands_pass( + std::move(early_visible_meshlet_instances_count_buffer), + std::move(late_visible_meshlet_instances_count_buffer), + std::move(cull_meshlets_cmd_buffer) + ); return cull_meshlets_cmd_buffer; } static auto cull_meshlets( - bool late_pass, + bool late, GPU::CullFlags cull_flags, TransferManager &transfer_man, vuk::Value &hiz_attachment, vuk::Value &cull_meshlets_cmd_buffer, - vuk::Value &visible_meshlet_instances_count_buffer, + vuk::Value &early_visible_meshlet_instances_count_buffer, + vuk::Value &late_visible_meshlet_instances_count_buffer, vuk::Value &meshlet_instance_visibility_mask_buffer, vuk::Value &visible_meshlet_instances_indices_buffer, vuk::Value &reordered_indices_buffer, @@ -562,11 +580,12 @@ static auto cull_meshlets( vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; + memory::ScopedStack stack; // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( - "vis cull meshlets", - [late_pass, cull_flags]( + stack.format("vis cull meshlets {}", late ? "late" : "early"), + [late, cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, @@ -575,7 +594,8 @@ static auto cull_meshlets( VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeSampled) hiz, - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRead) early_visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRead) late_visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) meshlet_instance_visibility_mask, VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, @@ -590,13 +610,14 @@ static auto cull_meshlets( .bind_buffer(0, 4, transforms) .bind_image(0, 5, hiz) .bind_sampler(0, 6, hiz_sampler_info) - .bind_buffer(0, 7, visible_meshlet_instances_count) - .bind_buffer(0, 8, meshlet_instance_visibility_mask) - .bind_buffer(0, 9, cull_triangles_cmd) - .bind_buffer(0, 10, visible_meshlet_instances_indices) - .bind_buffer(0, 11, debug_drawer) + .bind_buffer(0, 7, early_visible_meshlet_instances_count) + .bind_buffer(0, 8, late_visible_meshlet_instances_count) + .bind_buffer(0, 9, meshlet_instance_visibility_mask) + .bind_buffer(0, 10, cull_triangles_cmd) + .bind_buffer(0, 11, visible_meshlet_instances_indices) + .bind_buffer(0, 12, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) - .specialize_constants(0, late_pass ? 1 : 0) + .specialize_constants(0, late ? 1 : 0) .dispatch_indirect(dispatch_cmd); return std::make_tuple( @@ -607,7 +628,8 @@ static auto cull_meshlets( meshes, transforms, hiz, - visible_meshlet_instances_count, + early_visible_meshlet_instances_count, + late_visible_meshlet_instances_count, meshlet_instance_visibility_mask, cull_triangles_cmd, visible_meshlet_instances_indices, @@ -626,7 +648,8 @@ static auto cull_meshlets( meshes_buffer, transforms_buffer, hiz_attachment, - visible_meshlet_instances_count_buffer, + early_visible_meshlet_instances_count_buffer, + late_visible_meshlet_instances_count_buffer, meshlet_instance_visibility_mask_buffer, cull_triangles_cmd_buffer, visible_meshlet_instances_indices_buffer, @@ -640,7 +663,8 @@ static auto cull_meshlets( std::move(meshes_buffer), std::move(transforms_buffer), std::move(hiz_attachment), - std::move(visible_meshlet_instances_count_buffer), + std::move(early_visible_meshlet_instances_count_buffer), + std::move(late_visible_meshlet_instances_count_buffer), std::move(meshlet_instance_visibility_mask_buffer), std::move(cull_triangles_cmd_buffer), std::move(visible_meshlet_instances_indices_buffer), @@ -649,7 +673,7 @@ static auto cull_meshlets( // ── CULL TRIANGLES ────────────────────────────────────────────────── auto vis_cull_triangles_pass = vuk::make_pass( - "vis cull triangles", + stack.format("vis cull triangles {}", late ? "late" : "early"), [cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, @@ -716,6 +740,7 @@ static auto cull_meshlets( } static auto draw_visbuffer( + bool late, vuk::PersistentDescriptorSet &descriptor_set, vuk::Value &depth_attachment, vuk::Value &visbuffer_attachment, @@ -730,9 +755,10 @@ static auto draw_visbuffer( vuk::Value &camera_buffer ) -> void { ZoneScoped; + memory::ScopedStack stack; auto vis_encode_pass = vuk::make_pass( - "vis encode", + stack.format("vis encode {}", late ? "late" : "early"), [&descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, @@ -1208,7 +1234,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value({ 0 }); auto visible_meshlet_instances_indices_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, frame.max_meshlet_instance_count * sizeof(u32)); auto reordered_indices_buffer = transfer_man.alloc_transient_buffer( @@ -1216,6 +1241,9 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value Date: Mon, 25 Aug 2025 14:24:57 +0300 Subject: [PATCH 14/27] debugging gpu crash --- .../shaders/passes/cull_meshes.slang | 2 +- .../shaders/passes/cull_meshlets.slang | 86 +++++++++---------- Lorr/Engine/Scene/Scene.cc | 20 ++--- 3 files changed, 50 insertions(+), 58 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 4fceb215..af65694c 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -106,7 +106,7 @@ func cs_main( meshlet_instance_offset = std::atomic_add(early_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); } else { meshlet_instance_offset = std::atomic_add(late_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); - meshlet_instance_offset += early_visible_meshlet_instances_count[0]; + //meshlet_instance_offset += max(0, i32(early_visible_meshlet_instances_count[0]) - 1); } for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 7b150006..24bab7fd 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -39,9 +39,10 @@ func cs_main( var meshlet_instance_index = local_meshlet_instance_index; if (LATE == 1) { - meshlet_instance_index += early_visible_meshlet_instances_count[0]; + //meshlet_instance_index += early_visible_meshlet_instances_count[0]; } +#if 0 let mask_index = meshlet_instance_index / 32; let bit_index = meshlet_instance_index - (mask_index * 32); let visibility_bit = 1 << bit_index; @@ -57,61 +58,52 @@ func cs_main( let cull_occlusion = (cull_flags & CullFlags::MeshletOcclusion) != 0; let mvp = mul(camera.projection_view_mat, transform.world); -#if 1 - var visible = true; - if (LATE == 0 && !was_visible) { - // During previous frame, if meshlet was NOT visible, we don't render it. - visible = false; - } - - var skip = false; - if (LATE == 1 && was_visible) { - skip = true; - } - - if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); - } + if (LATE == 0) { + var visible = was_visible; + if (visible) { + visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); + } - if (LATE == 1 && visible && cull_occlusion) { - if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); + if (visible) { + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; + } + } else { + var visible = true; + if (visible) { + visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); + } - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 0.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); + if (visible && cull_occlusion) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); + + if (visible && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } } } - } - if (LATE == 1 && cull_occlusion) { + if (visible && !was_visible) { + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; + } + if (visible) { - std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_acq_rel); + std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); } else { - std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_acq_rel); + std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); } } - - if (visible && !skip) { - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - visible_meshlet_instances_indices[index] = meshlet_instance_index; - } - #else - var visible = true; - if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); - } - - if (visible) { - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - visible_meshlet_instances_indices[index] = meshlet_instance_index; - } + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; #endif } diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index a2da98f5..99b9db46 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -720,16 +720,16 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< auto sampler_index = 0_u32; auto flags = GPU::MaterialFlag::None; - if (albedo_image_index.has_value()) { - auto *texture = asset_man.get_texture(material->albedo_texture); - sampler_index = texture->sampler.index(); - flags |= GPU::MaterialFlag::HasAlbedoImage; - } - - flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + // if (albedo_image_index.has_value()) { + // auto *texture = asset_man.get_texture(material->albedo_texture); + // sampler_index = texture->sampler.index(); + // flags |= GPU::MaterialFlag::HasAlbedoImage; + // } + // + // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; gpu_material.albedo_color = material->albedo_color; gpu_material.emissive_color = material->emissive_color; From ac79ad2c43e56632d7bf559136788b92b3a097a0 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 26 Aug 2025 00:08:00 +0300 Subject: [PATCH 15/27] revert two pass occlusion --- Lorr/Engine/Graphics/Vulkan/Device.cc | 4 +- .../Engine/Graphics/Vulkan/TransferManager.cc | 10 +- .../shaders/passes/cull_meshes.slang | 122 +++------ .../shaders/passes/cull_meshlets.slang | 96 +++---- .../passes/generate_cull_commands.slang | 12 +- Lorr/Engine/Resources/shaders/scene.slang | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 242 ++++-------------- Lorr/Engine/Scene/SceneRenderer.hh | 4 - 8 files changed, 128 insertions(+), 364 deletions(-) diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 66608499..69a3930e 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -34,8 +34,8 @@ auto Device::init(this Device &self) -> bool { instance_builder.set_engine_version(1, 0, 0); instance_builder.enable_validation_layers(false); // use vkconfig ui... instance_builder.request_validation_layers(false); - // instance_builder.add_debug_messenger_severity(VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT); - // instance_builder.add_debug_messenger_type(VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT); + instance_builder.add_debug_messenger_severity(VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT); + instance_builder.add_debug_messenger_type(VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT); instance_builder.set_debug_callback( [](VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, diff --git a/Lorr/Engine/Graphics/Vulkan/TransferManager.cc b/Lorr/Engine/Graphics/Vulkan/TransferManager.cc index 30329c82..0cb9a45b 100644 --- a/Lorr/Engine/Graphics/Vulkan/TransferManager.cc +++ b/Lorr/Engine/Graphics/Vulkan/TransferManager.cc @@ -19,10 +19,12 @@ auto TransferManager::alloc_transient_buffer_raw(this TransferManager &self, vuk -> vuk::Buffer { ZoneScoped; - std::shared_lock _(self.mutex); - auto buffer = - *vuk::allocate_buffer(*self.device->allocator, { .mem_usage = usage, .size = size, .alignment = self.device->non_coherent_atom_size() }, LOC); - return *buffer; + auto read_lock = std::shared_lock(self.mutex); + auto buffer = vuk::Buffer{}; + auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = usage, .size = size, .alignment = self.device->non_coherent_atom_size() }; + self.device->allocator->allocate_buffers(std::span{ &buffer, 1 }, std::span{ &buffer_info, 1 }, LOC); + + return buffer; } auto TransferManager::alloc_transient_buffer(this TransferManager &self, vuk::MemoryUsage usage, usize size, vuk::source_location LOC) diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index af65694c..662a5cc3 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -4,18 +4,13 @@ import scene; import cull; import debug_drawer; -[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshes; [[vk::binding(2)]] StructuredBuffer transforms; -[[vk::binding(3)]] Image2D hiz_image; -[[vk::binding(4)]] Sampler hiz_sampler; -[[vk::binding(5)]] RWStructuredBuffer mesh_instances; -[[vk::binding(6)]] RWStructuredBuffer meshlet_instances; -[[vk::binding(7)]] RWStructuredBuffer mesh_visibility_mask; -[[vk::binding(8)]] RWStructuredBuffer early_visible_meshlet_instances_count; -[[vk::binding(9)]] RWStructuredBuffer late_visible_meshlet_instances_count; -[[vk::binding(10)]] RWStructuredBuffer debug_drawer; +[[vk::binding(3)]] RWStructuredBuffer mesh_instances; +[[vk::binding(4)]] RWStructuredBuffer meshlet_instances; +[[vk::binding(5)]] RWStructuredBuffer visible_meshlet_instances_count; +[[vk::binding(6)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHES_COUNT #define CULLING_MESHES_COUNT 64 @@ -33,92 +28,53 @@ func cs_main( return; } - let was_visible = mesh_visibility_mask[mesh_instance_index] == 1; - let mesh_instance = &mesh_instances[mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; let transform = transforms[mesh_instance.transform_index]; let mvp = mul(camera.projection_view_mat, transform.world); - let cull_occlusion = (cull_flags & CullFlags::MeshOcclusion) != 0; + let cull_frustum = (cull_flags & CullFlags::MeshFrustum) != 0; var visible = true; - if (LATE == 0 && !was_visible) { - // During previous frame, if mesh was NOT visible, we don't render it. - visible = false; - } - - if (visible && (cull_flags & CullFlags::MeshFrustum)) { - visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); - } - - if (LATE == 1 && visible && cull_occlusion) { - if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); - - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 1.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); - } - } - } + visible = visible && cull_frustum && test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); - if (visible && (LATE == 0 || !was_visible)) { - var lod_index = 0; + var lod_index = 0; #if 1 - // Credits: - // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 - let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; - let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; - let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; - let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; - let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); - let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); - - // Avoiding the atan here - let rough_resolution = max(camera.resolution.x, camera.resolution.y); - let fov90_distance_to_screen_ratio = 2.0f; - let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; - let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); - let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; - - for (var i = 1; i < mesh.lod_count; i++) { - let mesh_lod = mesh.lods[i]; - let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; - if (rough_pixel_error < camera.acceptable_lod_error) { - lod_index = i; - } else { - break; - } - } -#endif - - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - var meshlet_instance_offset = 0; - if (LATE == 0) { - meshlet_instance_offset = std::atomic_add(early_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); + + // Avoiding the atan here + let rough_resolution = max(camera.resolution.x, camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < camera.acceptable_lod_error) { + lod_index = i; } else { - meshlet_instance_offset = std::atomic_add(late_visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); - //meshlet_instance_offset += max(0, i32(early_visible_meshlet_instances_count[0]) - 1); - } - - for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { - let offset = meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - meshlet_instances[offset] = meshlet_instance; + break; } } +#endif - if (LATE == 1) { - mesh_visibility_mask[mesh_instance_index] = visible ? 1 : 0; + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_instance_offset = std::atomic_add(visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { + let offset = meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + meshlet_instances[offset] = meshlet_instance; } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 24bab7fd..c77d1a13 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -6,7 +6,6 @@ import debug_drawer; #include -[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshlet_instances; [[vk::binding(2)]] StructuredBuffer mesh_instances; @@ -14,12 +13,10 @@ import debug_drawer; [[vk::binding(4)]] StructuredBuffer transforms; [[vk::binding(5)]] Image2D hiz_image; [[vk::binding(6)]] Sampler hiz_sampler; -[[vk::binding(7)]] StructuredBuffer early_visible_meshlet_instances_count; -[[vk::binding(8)]] StructuredBuffer late_visible_meshlet_instances_count; -[[vk::binding(9)]] RWStructuredBuffer meshlet_visibility_mask; -[[vk::binding(10)]] RWStructuredBuffer cull_triangles_cmd; -[[vk::binding(11)]] RWStructuredBuffer visible_meshlet_instances_indices; -[[vk::binding(12)]] RWStructuredBuffer debug_drawer; +[[vk::binding(7)]] StructuredBuffer visible_meshlet_instances_count; +[[vk::binding(8)]] RWStructuredBuffer visible_meshlet_instances_indices; +[[vk::binding(9)]] RWStructuredBuffer cull_triangles_cmd; +[[vk::binding(10)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHLET_COUNT #define CULLING_MESHLET_COUNT 64 @@ -29,25 +26,14 @@ import debug_drawer; [[numthreads(CULLING_MESHLET_COUNT, 1, 1)]] func cs_main( uint3 thread_id : SV_DispatchThreadID, - uniform CullFlags cull_flags, + uniform CullFlags cull_flags ) -> void { - let meshlet_instance_count = (LATE == 0) ? early_visible_meshlet_instances_count[0] : late_visible_meshlet_instances_count[0]; - let local_meshlet_instance_index = thread_id.x; - if (local_meshlet_instance_index >= meshlet_instance_count) { + let meshlet_instance_count = visible_meshlet_instances_count[0]; + let meshlet_instance_index = thread_id.x; + if (meshlet_instance_index >= meshlet_instance_count) { return; } - var meshlet_instance_index = local_meshlet_instance_index; - if (LATE == 1) { - //meshlet_instance_index += early_visible_meshlet_instances_count[0]; - } - -#if 0 - let mask_index = meshlet_instance_index / 32; - let bit_index = meshlet_instance_index - (mask_index * 32); - let visibility_bit = 1 << bit_index; - let was_visible = (meshlet_visibility_mask[mask_index] & visibility_bit) != 0; - let meshlet_instance = meshlet_instances[meshlet_instance_index]; let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; @@ -55,55 +41,35 @@ func cs_main( let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; + let cull_frustum = (cull_flags & CullFlags::MeshletFrustum) != 0; let cull_occlusion = (cull_flags & CullFlags::MeshletOcclusion) != 0; - let mvp = mul(camera.projection_view_mat, transform.world); - if (LATE == 0) { - var visible = was_visible; - if (visible) { - visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); - } - - if (visible) { - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - visible_meshlet_instances_indices[index] = meshlet_instance_index; - } - } else { - var visible = true; - if (visible) { - visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); - } + var visible = true; + if (visible) { + let mvp = mul(camera.projection_view_mat, transform.world); + visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); + } - if (visible && cull_occlusion) { - if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); + if (visible && cull_occlusion) { + let mvp_prev = mul(camera.prev_projection_view_mat, transform.world); + if (let screen_aabb = project_aabb(mvp_prev, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 0.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); - } + if (visible && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); } } + } - if (visible && !was_visible) { - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - visible_meshlet_instances_indices[index] = meshlet_instance_index; - } - - if (visible) { - std::atomic_or(meshlet_visibility_mask[mask_index], visibility_bit, std::memory_order_relaxed); - } else { - std::atomic_and(meshlet_visibility_mask[mask_index], ~visibility_bit, std::memory_order_relaxed); - } + if (visible) { + let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + visible_meshlet_instances_indices[index] = meshlet_instance_index; } -#else - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); - visible_meshlet_instances_indices[index] = meshlet_instance_index; -#endif } diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang index f1b13fe8..bb9814cf 100644 --- a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -1,17 +1,11 @@ import std; import gpu; -[[vk::constant_id(0)]] const u32 LATE = 0; -[[vk::binding(0)]] StructuredBuffer early_visible_meshlet_instances_count; -[[vk::binding(1)]] StructuredBuffer late_visible_meshlet_instances_count; -[[vk::binding(2)]] RWStructuredBuffer cull_meshlets_cmd; +[[vk::binding(0)]] StructuredBuffer visible_meshlet_instances_count; +[[vk::binding(1)]] RWStructuredBuffer cull_meshlets_cmd; [[shader("compute")]] [[numthreads(1, 1, 1)]] func cs_main() -> void { - if (LATE == 0) { - cull_meshlets_cmd[0].x = (early_visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; - } else { - cull_meshlets_cmd[0].x = (late_visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; - } + cull_meshlets_cmd[0].x = (visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; } diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index f68a65f0..82e57205 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -75,7 +75,7 @@ public struct Camera { public mat4 projection_view_mat; public mat4 inv_view_mat; public mat4 inv_projection_view_mat; - public mat4 frustum_projection_view_mat; + public mat4 prev_projection_view_mat; public f32x3 position; public f32 near_clip; public f32 far_clip; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index d4b2dff5..c323480b 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -329,46 +329,13 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.gpu_mesh_instances.empty()) { self.mesh_instances_buffer = self.mesh_instances_buffer.resize(device, info.gpu_mesh_instances.size_bytes()).value(); prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); - - auto mesh_instance_visibility_mask_size_bytes = info.mesh_instance_count * sizeof(u32); - if (mesh_instance_visibility_mask_size_bytes > self.mesh_instance_visibility_mask_buffer.data_size()) { - self.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.resize(device, mesh_instance_visibility_mask_size_bytes).value(); - prepared_frame.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eNone); - prepared_frame.mesh_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.mesh_instance_visibility_mask_buffer)); - } else { - prepared_frame.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); - } - } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); - prepared_frame.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); } if (info.max_meshlet_instance_count > 0) { prepared_frame.meshlet_instances_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(GPU::MeshletInstance)); - - auto meshlet_instance_visibility_mask_size_bytes = (info.max_meshlet_instance_count + 31) / 32 * sizeof(u32); - if (meshlet_instance_visibility_mask_size_bytes > self.meshlet_instance_visibility_mask_buffer.data_size()) { - self.meshlet_instance_visibility_mask_buffer = - self.meshlet_instance_visibility_mask_buffer.resize(device, meshlet_instance_visibility_mask_size_bytes).value(); - prepared_frame.meshlet_instance_visibility_mask_buffer = - self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instance visibility mask", vuk::eNone); - prepared_frame.meshlet_instance_visibility_mask_buffer = - zero_fill_pass(std::move(prepared_frame.meshlet_instance_visibility_mask_buffer)); - } else { - prepared_frame.meshlet_instance_visibility_mask_buffer = - self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instance visibility mask", vuk::eMemoryRead); - } - } else { - if (self.meshlet_instance_visibility_mask_buffer) { - device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); - self.meshlet_instance_visibility_mask_buffer = {}; - } } info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); @@ -440,37 +407,29 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } static auto cull_meshes( - bool late, GPU::CullFlags cull_flags, u32 mesh_instance_count, TransferManager &transfer_man, - vuk::Value &hiz_attachment, vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, vuk::Value &meshlet_instances_buffer, - vuk::Value &early_visible_meshlet_instances_count_buffer, - vuk::Value &late_visible_meshlet_instances_count_buffer, - vuk::Value &mesh_visibility_mask_buffer, + vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &transforms_buffer, vuk::Value &camera_buffer, vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; - memory::ScopedStack stack; auto vis_cull_meshes_pass = vuk::make_pass( - stack.format("vis cull meshes {}", late ? "late" : "early"), - [mesh_instance_count, cull_flags, late]( + "vis cull meshes", + [mesh_instance_count, cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, - VUK_IA(vuk::eComputeSampled) hiz, VUK_BA(vuk::eComputeRW) mesh_instances, VUK_BA(vuk::eComputeRW) meshlet_instances, - VUK_BA(vuk::eComputeRW) mesh_visibility_mask, - VUK_BA(vuk::eComputeRW) early_visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) late_visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // @@ -478,28 +437,20 @@ static auto cull_meshes( .bind_buffer(0, 0, camera) .bind_buffer(0, 1, meshes) .bind_buffer(0, 2, transforms) - .bind_image(0, 3, hiz) - .bind_sampler(0, 4, hiz_sampler_info) - .bind_buffer(0, 5, mesh_instances) - .bind_buffer(0, 6, meshlet_instances) - .bind_buffer(0, 7, mesh_visibility_mask) - .bind_buffer(0, 8, early_visible_meshlet_instances_count) - .bind_buffer(0, 9, late_visible_meshlet_instances_count) - .bind_buffer(0, 10, debug_drawer) + .bind_buffer(0, 3, mesh_instances) + .bind_buffer(0, 4, meshlet_instances) + .bind_buffer(0, 5, visible_meshlet_instances_count) + .bind_buffer(0, 6, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) - .specialize_constants(0, late ? 1 : 0) .dispatch_invocations(mesh_instance_count); return std::make_tuple( camera, meshes, transforms, - hiz, mesh_instances, meshlet_instances, - mesh_visibility_mask, - early_visible_meshlet_instances_count, - late_visible_meshlet_instances_count, + visible_meshlet_instances_count, debug_drawer ); } @@ -509,52 +460,40 @@ static auto cull_meshes( camera_buffer, meshes_buffer, transforms_buffer, - hiz_attachment, mesh_instances_buffer, meshlet_instances_buffer, - mesh_visibility_mask_buffer, - early_visible_meshlet_instances_count_buffer, - late_visible_meshlet_instances_count_buffer, + visible_meshlet_instances_count_buffer, debug_drawer_buffer ) = vis_cull_meshes_pass( std::move(camera_buffer), std::move(meshes_buffer), std::move(transforms_buffer), - std::move(hiz_attachment), std::move(mesh_instances_buffer), std::move(meshlet_instances_buffer), - std::move(mesh_visibility_mask_buffer), - std::move(early_visible_meshlet_instances_count_buffer), - std::move(late_visible_meshlet_instances_count_buffer), + std::move(visible_meshlet_instances_count_buffer), std::move(debug_drawer_buffer) ); auto generate_cull_commands_pass = vuk::make_pass( - stack.format("generate cull commands {}", late ? "late" : "early"), - [late]( - vuk::CommandBuffer &cmd_list, // - VUK_BA(vuk::eComputeRead) early_visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRead) late_visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd - ) { + "generate cull commands", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { cmd_list // .bind_compute_pipeline("passes.generate_cull_commands") - .bind_buffer(0, 0, early_visible_meshlet_instances_count) - .bind_buffer(0, 1, late_visible_meshlet_instances_count) - .bind_buffer(0, 2, cull_meshlets_cmd) - .specialize_constants(0, late ? 1 : 0) + .bind_buffer(0, 0, visible_meshlet_instances_count) + .bind_buffer(0, 1, cull_meshlets_cmd) .dispatch(1); - return std::make_tuple(early_visible_meshlet_instances_count, late_visible_meshlet_instances_count, cull_meshlets_cmd); + return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); } ); auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - std::tie(early_visible_meshlet_instances_count_buffer, late_visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = + std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = generate_cull_commands_pass( - std::move(early_visible_meshlet_instances_count_buffer), - std::move(late_visible_meshlet_instances_count_buffer), + std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer) ); @@ -562,14 +501,11 @@ static auto cull_meshes( } static auto cull_meshlets( - bool late, GPU::CullFlags cull_flags, TransferManager &transfer_man, vuk::Value &hiz_attachment, vuk::Value &cull_meshlets_cmd_buffer, - vuk::Value &early_visible_meshlet_instances_count_buffer, - vuk::Value &late_visible_meshlet_instances_count_buffer, - vuk::Value &meshlet_instance_visibility_mask_buffer, + vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &visible_meshlet_instances_indices_buffer, vuk::Value &reordered_indices_buffer, vuk::Value &meshes_buffer, @@ -580,12 +516,11 @@ static auto cull_meshlets( vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; - memory::ScopedStack stack; // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( - stack.format("vis cull meshlets {}", late ? "late" : "early"), - [late, cull_flags]( + "vis cull meshlets", + [cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, @@ -594,13 +529,13 @@ static auto cull_meshlets( VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeSampled) hiz, - VUK_BA(vuk::eComputeRead) early_visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRead) late_visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) meshlet_instance_visibility_mask, - VUK_BA(vuk::eComputeRW) cull_triangles_cmd, + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeRW) debug_drawer ) { + cmd_list.image_barrier(hiz, vuk::eComputeRW, vuk::eComputeSampled, 0, hiz->level_count); + cmd_list // .bind_compute_pipeline("passes.cull_meshlets") .bind_buffer(0, 0, camera) @@ -610,14 +545,11 @@ static auto cull_meshlets( .bind_buffer(0, 4, transforms) .bind_image(0, 5, hiz) .bind_sampler(0, 6, hiz_sampler_info) - .bind_buffer(0, 7, early_visible_meshlet_instances_count) - .bind_buffer(0, 8, late_visible_meshlet_instances_count) - .bind_buffer(0, 9, meshlet_instance_visibility_mask) - .bind_buffer(0, 10, cull_triangles_cmd) - .bind_buffer(0, 11, visible_meshlet_instances_indices) - .bind_buffer(0, 12, debug_drawer) + .bind_buffer(0, 7, visible_meshlet_instances_count) + .bind_buffer(0, 8, visible_meshlet_instances_indices) + .bind_buffer(0, 9, cull_triangles_cmd) + .bind_buffer(0, 10, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) - .specialize_constants(0, late ? 1 : 0) .dispatch_indirect(dispatch_cmd); return std::make_tuple( @@ -628,11 +560,9 @@ static auto cull_meshlets( meshes, transforms, hiz, - early_visible_meshlet_instances_count, - late_visible_meshlet_instances_count, - meshlet_instance_visibility_mask, - cull_triangles_cmd, + visible_meshlet_instances_count, visible_meshlet_instances_indices, + cull_triangles_cmd, debug_drawer ); } @@ -648,11 +578,9 @@ static auto cull_meshlets( meshes_buffer, transforms_buffer, hiz_attachment, - early_visible_meshlet_instances_count_buffer, - late_visible_meshlet_instances_count_buffer, - meshlet_instance_visibility_mask_buffer, - cull_triangles_cmd_buffer, + visible_meshlet_instances_count_buffer, visible_meshlet_instances_indices_buffer, + cull_triangles_cmd_buffer, debug_drawer_buffer ) = vis_cull_meshlets_pass( @@ -663,17 +591,15 @@ static auto cull_meshlets( std::move(meshes_buffer), std::move(transforms_buffer), std::move(hiz_attachment), - std::move(early_visible_meshlet_instances_count_buffer), - std::move(late_visible_meshlet_instances_count_buffer), - std::move(meshlet_instance_visibility_mask_buffer), - std::move(cull_triangles_cmd_buffer), + std::move(visible_meshlet_instances_count_buffer), std::move(visible_meshlet_instances_indices_buffer), + std::move(cull_triangles_cmd_buffer), std::move(debug_drawer_buffer) ); // ── CULL TRIANGLES ────────────────────────────────────────────────── auto vis_cull_triangles_pass = vuk::make_pass( - stack.format("vis cull triangles {}", late ? "late" : "early"), + "vis cull triangles", [cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, @@ -740,7 +666,6 @@ static auto cull_meshlets( } static auto draw_visbuffer( - bool late, vuk::PersistentDescriptorSet &descriptor_set, vuk::Value &depth_attachment, vuk::Value &visbuffer_attachment, @@ -755,10 +680,9 @@ static auto draw_visbuffer( vuk::Value &camera_buffer ) -> void { ZoneScoped; - memory::ScopedStack stack; auto vis_encode_pass = vuk::make_pass( - stack.format("vis encode {}", late ? "late" : "early"), + "vis encode", [&descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, @@ -1231,8 +1155,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value void { self.meshes_buffer = {}; } - if (self.mesh_instance_visibility_mask_buffer) { - device.destroy(self.mesh_instance_visibility_mask_buffer.id()); - self.mesh_instance_visibility_mask_buffer = {}; - } - - if (self.meshlet_instance_visibility_mask_buffer) { - device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); - self.meshlet_instance_visibility_mask_buffer = {}; - } - if (self.materials_buffer) { device.destroy(self.materials_buffer.id()); self.materials_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index f006aaad..f7e70ef8 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -30,9 +30,7 @@ struct PreparedFrame { vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; - vuk::Value mesh_instance_visibility_mask_buffer = {}; vuk::Value meshlet_instances_buffer = {}; - vuk::Value meshlet_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; @@ -57,8 +55,6 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; - Buffer mesh_instance_visibility_mask_buffer = {}; - Buffer meshlet_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; From 2c801f36351a3974be08fb5afb981139403cfcf2 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:04:05 +0300 Subject: [PATCH 16/27] fix gpu materials --- Lorr/Engine/Asset/Asset.cc | 1 + Lorr/Engine/Resources/shaders/scene.slang | 2 +- Lorr/Engine/Scene/GPUScene.hh | 2 +- Lorr/Engine/Scene/Scene.cc | 71 +++++++++++++---------- Lorr/Engine/Scene/Scene.hh | 4 +- Lorr/Engine/Scene/SceneRenderer.cc | 1 - 6 files changed, 47 insertions(+), 34 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 81934d49..a8963b58 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -838,6 +838,7 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool simplified_indices.resize(result_index_count); } + gpu_mesh.vertex_count = mesh_vertices.size(); gpu_mesh.lod_count += 1; last_lod_indices = simplified_indices; diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 82e57205..fab0ed8d 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -302,7 +302,7 @@ public struct Mesh { public f32x3 *vertex_positions = nullptr; public f32x3 *vertex_normals = nullptr; public f32x2 *texture_coords = nullptr; - public u32 _padding = 0; + public u32 vertex_count = 0; public u32 lod_count = 0; public MeshLOD lods[MESH_MAX_LODS] = {}; public Bounds bounds = {}; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 72c76b4b..fce026e5 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -209,7 +209,7 @@ struct Mesh { alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; alignas(8) u64 texture_coords = 0; - alignas(4) u32 _padding = 0; + alignas(4) u32 vertex_count = 0; alignas(4) u32 lod_count = 0; alignas(8) MeshLOD lods[MAX_LODS] = {}; alignas(4) Bounds bounds = {}; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 99b9db46..603aeabf 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -708,10 +708,19 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< }; auto dirty_material_ids = asset_man.get_dirty_material_ids(); - auto gpu_materials = std::vector(dirty_material_ids.size()); - auto dirty_material_indices = std::vector(dirty_material_ids.size()); - for (const auto &[gpu_material, index, id] : std::views::zip(gpu_materials, dirty_material_indices, dirty_material_ids)) { - const auto *material = asset_man.get_material(id); + auto dirty_material_indices = std::vector(); + for (const auto dirty_id : dirty_material_ids) { + const auto *material = asset_man.get_material(dirty_id); + if (!material) { + continue; + } + + auto dirty_index = SlotMap_decode_id(dirty_id).index; + dirty_material_indices.push_back(dirty_index); + if (dirty_index <= self.gpu_materials.size()) { + self.gpu_materials.resize(dirty_index + 1, {}); + } + auto albedo_image_index = uuid_to_image_index(material->albedo_texture); auto normal_image_index = uuid_to_image_index(material->normal_texture); auto emissive_image_index = uuid_to_image_index(material->emissive_texture); @@ -720,31 +729,33 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< auto sampler_index = 0_u32; auto flags = GPU::MaterialFlag::None; - // if (albedo_image_index.has_value()) { - // auto *texture = asset_man.get_texture(material->albedo_texture); - // sampler_index = texture->sampler.index(); - // flags |= GPU::MaterialFlag::HasAlbedoImage; - // } - // - // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; - - gpu_material.albedo_color = material->albedo_color; - gpu_material.emissive_color = material->emissive_color; - gpu_material.roughness_factor = material->roughness_factor; - gpu_material.metallic_factor = material->metallic_factor; - gpu_material.alpha_cutoff = material->alpha_cutoff; - gpu_material.flags = flags; - gpu_material.sampler_index = sampler_index; - gpu_material.albedo_image_index = albedo_image_index.value_or(0_u32); - gpu_material.normal_image_index = normal_image_index.value_or(0_u32); - gpu_material.emissive_image_index = emissive_image_index.value_or(0_u32); - gpu_material.metallic_roughness_image_index = metallic_roughness_image_index.value_or(0_u32); - gpu_material.occlusion_image_index = occlusion_image_index.value_or(0_u32); - - index = SlotMap_decode_id(id).index; + if (albedo_image_index.has_value()) { + auto *texture = asset_man.get_texture(material->albedo_texture); + sampler_index = texture->sampler.index(); + flags |= GPU::MaterialFlag::HasAlbedoImage; + } + + flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + + auto gpu_material = GPU::Material { + .albedo_color = material->albedo_color, + .emissive_color = material->emissive_color, + .roughness_factor = material->roughness_factor, + .metallic_factor = material->metallic_factor, + .alpha_cutoff = material->alpha_cutoff, + .flags = flags, + .sampler_index = sampler_index, + .albedo_image_index = albedo_image_index.value_or(0_u32), + .normal_image_index = normal_image_index.value_or(0_u32), + .emissive_image_index = emissive_image_index.value_or(0_u32), + .metallic_roughness_image_index = metallic_roughness_image_index.value_or(0_u32), + .occlusion_image_index = occlusion_image_index.value_or(0_u32), + }; + + self.gpu_materials[dirty_index] = gpu_material; } auto prepare_info = FramePrepareInfo{ @@ -754,7 +765,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< .dirty_transform_ids = self.dirty_transforms, .gpu_transforms = self.transforms.slots_unsafe(), .dirty_material_indices = dirty_material_indices, - .gpu_materials = gpu_materials, + .gpu_materials = self.gpu_materials, .gpu_meshes = gpu_meshes, .gpu_mesh_instances = gpu_mesh_instances, .environment = environment, diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 59c9c3b8..e73bb321 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -37,8 +37,10 @@ private: SlotMap transforms = {}; ankerl::unordered_dense::map entity_transforms_map = {}; ankerl::unordered_dense::map, std::vector> rendering_meshes_map = {}; - std::vector dirty_transforms = {}; + + std::vector gpu_materials = {}; + bool models_dirty = false; u32 mesh_instance_count = 0; u32 max_meshlet_instance_count = 0; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index c323480b..9a05d15d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -5,7 +5,6 @@ #include "Engine/Core/App.hh" #include "Engine/Graphics/VulkanDevice.hh" -#include "Engine/Memory/Stack.hh" namespace lr { enum BindlessDescriptorLayout : u32 { From ef72d36534aaaeccc320f799856b4888fcfb2915 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 26 Aug 2025 19:53:19 +0300 Subject: [PATCH 17/27] imgui asset fixes --- Lorr/Editor/EditorModule.cc | 2 +- Lorr/Editor/Window/AssetBrowserWindow.cc | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 50 ++++++++++++------------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Lorr/Editor/EditorModule.cc b/Lorr/Editor/EditorModule.cc index e3e11ac3..8e6e9f9c 100755 --- a/Lorr/Editor/EditorModule.cc +++ b/Lorr/Editor/EditorModule.cc @@ -372,7 +372,7 @@ static auto draw_welcome_popup(EditorModule &self) -> void { if (ImGui::BeginPopupModal("###welcome", nullptr, popup_flags)) { // ── HEADERS ───────────────────────────────────────────────────────── ImGui::TextUnformatted("placeholder"); - ImGui::InvisibleButton("placeholder", { 0.0f, 75.0f }); + ImGui::InvisibleButton("placeholder", { -FLT_MAX, 75.0f }); // ── SECTIONS ──────────────────────────────────────────────────────── if (ImGui::BeginTabBar("project_guide")) { diff --git a/Lorr/Editor/Window/AssetBrowserWindow.cc b/Lorr/Editor/Window/AssetBrowserWindow.cc index e791e6e3..d2596ec6 100755 --- a/Lorr/Editor/Window/AssetBrowserWindow.cc +++ b/Lorr/Editor/Window/AssetBrowserWindow.cc @@ -242,7 +242,7 @@ static auto draw_dir_contents(AssetBrowserWindow &self) -> void { auto *asset_texture = editor.get_asset_texture(asset); auto asset_image = imgui_renderer.add_image(asset_texture->image_view); ImGui::image_button(file_name, asset_image, button_size); - if (ImGui::BeginDragDropSource()) { + if (ImGui::BeginDragDropSource(ImGuiDragDropFlags_SourceAllowNullID)) { ImGui::SetDragDropPayload("ASSET_BY_UUID", &asset->uuid, sizeof(lr::UUID)); ImGui::EndDragDropSource(); } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index c323480b..5fa16c93 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1337,31 +1337,31 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value); - std::tie( - camera_buffer, - meshlet_instances_buffer, - mesh_instances_buffer, - meshes_buffer, - transforms_buffer, - visbuffer_attachment, - albedo_attachment, - normal_attachment, - emissive_attachment, - metallic_roughness_occlusion_attachment - ) = - vis_decode_pass( - std::move(camera_buffer), - std::move(meshlet_instances_buffer), - std::move(mesh_instances_buffer), - std::move(meshes_buffer), - std::move(transforms_buffer), - std::move(materials_buffer), - std::move(visbuffer_attachment), - std::move(albedo_attachment), - std::move(normal_attachment), - std::move(emissive_attachment), - std::move(metallic_roughness_occlusion_attachment) - ); + // std::tie( + // camera_buffer, + // meshlet_instances_buffer, + // mesh_instances_buffer, + // meshes_buffer, + // transforms_buffer, + // visbuffer_attachment, + // albedo_attachment, + // normal_attachment, + // emissive_attachment, + // metallic_roughness_occlusion_attachment + // ) = + // vis_decode_pass( + // std::move(camera_buffer), + // std::move(meshlet_instances_buffer), + // std::move(mesh_instances_buffer), + // std::move(meshes_buffer), + // std::move(transforms_buffer), + // std::move(materials_buffer), + // std::move(visbuffer_attachment), + // std::move(albedo_attachment), + // std::move(normal_attachment), + // std::move(emissive_attachment), + // std::move(metallic_roughness_occlusion_attachment) + // ); // ── BRDF ──────────────────────────────────────────────────────────── auto brdf_pass = vuk::make_pass( From 5256603a54a95ceb19cab236d982fd3d2dcd6d17 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 28 Aug 2025 10:10:25 +0300 Subject: [PATCH 18/27] remove atomic.slang and gpu.slang --- Lorr/Engine/Graphics/Slang/Compiler.cc | 5 +- Lorr/Engine/Resources/shaders/cull.slang | 2 +- .../Resources/shaders/debug_drawer.slang | 6 +- Lorr/Engine/Resources/shaders/gpu/image.slang | 85 +--- .../Resources/shaders/passes/brdf.slang | 17 +- .../Resources/shaders/passes/copy.slang | 2 +- .../shaders/passes/cull_meshes.slang | 2 +- .../shaders/passes/cull_meshlets.slang | 4 +- .../shaders/passes/cull_triangles.slang | 21 +- .../shaders/passes/editor_mousepick.slang | 2 +- .../shaders/passes/histogram_average.slang | 5 +- .../shaders/passes/histogram_generate.slang | 10 +- .../Engine/Resources/shaders/passes/hiz.slang | 22 +- .../Resources/shaders/passes/hiz_slow.slang | 4 +- .../Resources/shaders/passes/imgui.slang | 2 +- .../passes/sky_aerial_perspective.slang | 3 +- .../Resources/shaders/passes/sky_final.slang | 6 +- .../shaders/passes/sky_multiscattering.slang | 13 +- .../Resources/shaders/passes/sky_view.slang | 7 +- .../Resources/shaders/passes/tonemap.slang | 2 +- .../shaders/passes/visbuffer_decode.slang | 4 +- .../shaders/passes/visbuffer_encode.slang | 2 +- Lorr/Engine/Resources/shaders/scene.slang | 12 +- Lorr/Engine/Resources/shaders/sky.slang | 13 +- Lorr/Engine/Resources/shaders/std.slang | 1 - .../Engine/Resources/shaders/std/atomic.slang | 462 ------------------ Lorr/Engine/Scene/GPUScene.hh | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 25 +- Lorr/Engine/Scene/SceneRenderer.hh | 1 - raddgb.proj | 20 + xmake/packages.lua | 4 +- xmake/repo/packages/s/shader-slang/xmake.lua | 48 ++ 32 files changed, 173 insertions(+), 641 deletions(-) delete mode 100644 Lorr/Engine/Resources/shaders/std/atomic.slang create mode 100644 raddgb.proj create mode 100644 xmake/repo/packages/s/shader-slang/xmake.lua diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index c2392ddd..9b2cd286 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -328,7 +328,7 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::option ls::option u32 { - let index = std::atomic_add(draw_data.draw_count, 1, std::memory_order_acq_rel); + let index = __atomic_add(draw_data.draw_count, 1, MemoryOrder::AcquireRelease); if (index < draw_data.capacity) { return index; } @@ -49,7 +49,7 @@ func push_draw(__ref DebugDrawData draw_data) -> u32 { public func debug_draw_aabb(__ref DebugDrawer drawer, in DebugAABB v) -> void { let index = push_draw(drawer.aabb_data); if (index != ~0u) { - std::atomic_add(drawer.aabb_draw_cmd.instance_count, 1, std::memory_order_acq_rel); + __atomic_add(drawer.aabb_draw_cmd.instance_count, 1, MemoryOrder::AcquireRelease); drawer.aabb_buffer[index] = v; } } @@ -57,7 +57,7 @@ public func debug_draw_aabb(__ref DebugDrawer drawer, in DebugAABB v) -> void { public func debug_draw_rect(__ref DebugDrawer drawer, in DebugRect v) -> void { let index = push_draw(drawer.rect_data); if (index != ~0u) { - std::atomic_add(drawer.rect_draw_cmd.instance_count, 1, std::memory_order_acq_rel); + __atomic_add(drawer.rect_draw_cmd.instance_count, 1, MemoryOrder::AcquireRelease); drawer.rect_buffer[index] = v; } } \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/gpu/image.slang b/Lorr/Engine/Resources/shaders/gpu/image.slang index 644af432..f08cd6a8 100644 --- a/Lorr/Engine/Resources/shaders/gpu/image.slang +++ b/Lorr/Engine/Resources/shaders/gpu/image.slang @@ -2,93 +2,20 @@ implementing gpu; import std; -public enum ImageOperand : u32 { - None = 0x0, - Bias = 0x1, - Lod = 0x2, - Grad = 0x4, - ConstOffset = 0x8, - Offset = 0x10, - ConstOffsets = 0x20, - MinLod = 0x80, - - // SPIR-V 1.5 - SPV_KHR_vulkan_memory_model - MakeTexelAvailable = 0x100, // Requires NonPrivateTexel to also be set. - MakeTexelVisible = 0x200, // Requires NonPrivateTexel to also be set. - NonPrivateTexel = 0x400, - VolatileTexel = 0x800, -}; - -// Image ──────────────────────────────────────────────────────────── -public typealias Image = _Texture; - -public extension Image { - public func sample(in Sampler sampler, vector tex_coords) -> T { - return this.Sample(sampler, tex_coords); - } - - public func sample_mip(in Sampler sampler, vector tex_coords, f32 mip) -> T { - return this.SampleLevel(sampler, tex_coords, mip); - } - - public func sample_grad( - in Sampler sampler, - vector tex_coords, - vector ddx, - vector ddy - ) -> T { - return this.SampleGrad(sampler, tex_coords, ddx, ddy); - } -}; - // Image1D ────────────────────────────────────────────────────────── -__generic -public typealias Image1D = Image; - -public extension Image1D { -}; +public typealias Image1D = Texture1D; // Image2D ────────────────────────────────────────────────────────── -public typealias Image2D = Image; - -public extension Image2D { - public func load(u32x2 texel, u32 mip = 0) -> T { - let coord = __vectorReshape<2>(texel); - return spirv_asm { - %sampled: __sampledType(T) = OpImageFetch $this $coord Lod $mip; - __truncate $$T result __sampledType(T) %sampled; - }; - } -}; +public typealias Image2D = Texture2D; // Image3D ────────────────────────────────────────────────────────── -public typealias Image3D = Image; - -public extension Image3D { -}; - -// StorageImage ──────────────────────────────────────────────────────────── -public typealias StorageImage = _Texture; -public extension StorageImage { - public func load(vector texel, MemoryScope scope = MemoryScope::Device) -> T { - return spirv_asm { - %sampled:__sampledType(T) = OpImageRead $this $texel NonPrivateTexel|MakeTexelVisible $scope; - __truncate $$T result __sampledType(T) %sampled; - }; - } - - public func store(vector texel, T value, MemoryScope scope = MemoryScope::Device) -> void { - spirv_asm { - OpImageWrite $this $texel __convertTexel(value) NonPrivateTexel|MakeTexelAvailable $scope; - }; - } -}; +public typealias Image3D = Texture3D; // StorageImage1D ─────────────────────────────────────────────────── -public typealias StorageImage1D = StorageImage; +public typealias StorageImage1D = RWTexture1D; // StorageImage2D ─────────────────────────────────────────────────── -public typealias StorageImage2D = StorageImage; +public typealias StorageImage2D = RWTexture2D; // StorageImage3D ─────────────────────────────────────────────────── -public typealias StorageImage3D = StorageImage; +public typealias StorageImage3D = RWTexture3D; diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 6015e53d..fcf3204c 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -26,21 +26,21 @@ ParameterBlock params; [[shader("fragment")]] func fs_main(VertexOutput input) -> f32x4 { - let pixel_pos = u32x2(input.position.xy); - let depth = params.depth_image.load(pixel_pos); + let pixel_pos = u32x3(u32x2(input.position.xy), 0); + let depth = params.depth_image.Load(pixel_pos); if (depth == 0.0) { discard; } - let albedo_color = params.albedo_image.sample_mip(params.linear_repeat_sampler, input.tex_coord, 0).rgb; + let albedo_color = params.albedo_image.SampleLevel(params.linear_repeat_sampler, input.tex_coord, 0).rgb; - let mapped_smooth_normal = params.normal_image.load(pixel_pos); + let mapped_smooth_normal = params.normal_image.Load(pixel_pos); let mapped_normal = std::oct_to_vec3(mapped_smooth_normal.xy); let smooth_normal = std::oct_to_vec3(mapped_smooth_normal.zw); - let emission = params.emissive_image.load(pixel_pos); + let emission = params.emissive_image.Load(pixel_pos); - let metallic_roughness_occlusion = params.metallic_roughness_occlusion_image.load(pixel_pos); + let metallic_roughness_occlusion = params.metallic_roughness_occlusion_image.Load(pixel_pos); let metallic = metallic_roughness_occlusion.x; let roughness = metallic_roughness_occlusion.y; let occlusion = metallic_roughness_occlusion.z; @@ -67,11 +67,11 @@ func fs_main(VertexOutput input) -> f32x4 { params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, f32x2(eye_altitude, sun_cos_theta)); - f32x3 sun_transmittance = params.sky_transmittance_lut.sample_mip(params.linear_clamp_sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = params.sky_transmittance_lut.SampleLevel(params.linear_clamp_sampler, transmittance_uv, 0.0).rgb; sun_illuminance = sun_transmittance * params.environment.sun_intensity; // SKY AMBIENT COLOR ──────────────────────────────────────────────── - AtmosphereIntegrateInfo sky_info = {}; + AtmosphereIntegrateInfo sky_info; sky_info.eye_pos = eye_pos; sky_info.eye_dir = up_vec; sky_info.sun_dir = L; @@ -81,6 +81,7 @@ func fs_main(VertexOutput input) -> f32x4 { sky_info.sampling.max_sample_count = 4; sky_info.transmittance_image = params.sky_transmittance_lut; sky_info.multiscattering_image = params.sky_multiscattering_lut; + sky_info.eval_multiscattering = true; sky_info.eval_mie_phase = false; let sky_result = integrate_single_scattered_luminance(params.environment, params.linear_clamp_sampler, sky_info); diff --git a/Lorr/Engine/Resources/shaders/passes/copy.slang b/Lorr/Engine/Resources/shaders/passes/copy.slang index 4d3ef70e..7868f9e0 100644 --- a/Lorr/Engine/Resources/shaders/passes/copy.slang +++ b/Lorr/Engine/Resources/shaders/passes/copy.slang @@ -15,5 +15,5 @@ func cs_main(u32x3 thread_id : SV_DispatchThreadID) -> void { return; } - dst_image.Store(thread_id.xy, src_image.load(thread_id.xy, 0)); + dst_image.Store(thread_id.xy, src_image.Load(u32x3(thread_id.xy, 0), 0)); } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 662a5cc3..13e065df 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -69,7 +69,7 @@ func cs_main( mesh_instance.lod_index = lod_index; let mesh_lod = mesh.lods[lod_index]; - let meshlet_instance_offset = std::atomic_add(visible_meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + let meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], mesh_lod.meshlet_count, MemoryOrder::Relaxed); for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { let offset = meshlet_instance_offset + i; var meshlet_instance = MeshletInstance(); diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index c77d1a13..c0e078d1 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -45,7 +45,7 @@ func cs_main( let cull_occlusion = (cull_flags & CullFlags::MeshletOcclusion) != 0; var visible = true; - if (visible) { + if (visible && cull_frustum) { let mvp = mul(camera.projection_view_mat, transform.world); visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } @@ -69,7 +69,7 @@ func cs_main( } if (visible) { - let index = std::atomic_add(cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + let index = __atomic_add(cull_triangles_cmd[0].x, 1, MemoryOrder::Relaxed); visible_meshlet_instances_indices[index] = meshlet_instance_index; } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index e530f4ab..21f5409a 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -130,7 +130,7 @@ func cs_main( model_view_proj_shared = mul(params.camera.projection_view_mat, transform.world); } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); var triangle_passed = false; var active_triangle_index = 0; @@ -138,23 +138,28 @@ func cs_main( let indices = meshlet.indices(mesh_lod, local_index); let positions = meshlet.positions(mesh, indices); triangle_passed = test_triangle(positions, params.camera.resolution, cull_flags, local_index); + triangle_passed = true; if (triangle_passed) { - active_triangle_index = std::atomic_add(triangles_passed_shared, 1, std::memory_order_relaxed); + active_triangle_index = __atomic_add(triangles_passed_shared, 1, MemoryOrder::Relaxed); } } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (local_index == 0) { - base_index_shared = std::atomic_add(params.draw_cmd[0].index_count, triangles_passed_shared * 3, std::memory_order_relaxed); + base_index_shared = __atomic_add(params.draw_cmd[0].index_count, triangles_passed_shared * 3, MemoryOrder::Relaxed); } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (triangle_passed) { + u32 struct_count, struct_stride; + params.reordered_indices.GetDimensions(struct_count, struct_stride); let index_offset = base_index_shared + active_triangle_index * 3; - params.reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); + if (index_offset + 2 < struct_count) { + params.reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); + } } } diff --git a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang index 3caf42ee..be6ef9e6 100644 --- a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang +++ b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang @@ -25,7 +25,7 @@ struct PushConstants { [[shader("compute")]] [[numthreads(1, 1, 1)]] func cs_main() -> void { - const u32 texel = visbuffer_data.load(C.texel); + const u32 texel = visbuffer_data.Load(u32x3(C.texel, 0)); if (texel == ~0u) { *C.dst = ~0u; return; diff --git a/Lorr/Engine/Resources/shaders/passes/histogram_average.slang b/Lorr/Engine/Resources/shaders/passes/histogram_average.slang index ecb13043..5e1475b6 100644 --- a/Lorr/Engine/Resources/shaders/passes/histogram_average.slang +++ b/Lorr/Engine/Resources/shaders/passes/histogram_average.slang @@ -29,15 +29,14 @@ func cs_main( ) -> void { let count_for_this_bin = gid == 0 ? 0.0 : f32(params.histogram_bin_indices[gid]); histogram_shared[gid] = count_for_this_bin * f32(gid); - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); [[unroll]] for (u32 cutoff = (HISTOGRAM_BIN_COUNT >> 1); cutoff > 0; cutoff >>= 1) { if (gid < cutoff) { histogram_shared[gid] += histogram_shared[gid + cutoff]; } - - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); } if (gid == 0) { diff --git a/Lorr/Engine/Resources/shaders/passes/histogram_generate.slang b/Lorr/Engine/Resources/shaders/passes/histogram_generate.slang index e2367161..a5857642 100644 --- a/Lorr/Engine/Resources/shaders/passes/histogram_generate.slang +++ b/Lorr/Engine/Resources/shaders/passes/histogram_generate.slang @@ -35,15 +35,15 @@ func cs_main( uniform i32x3 src_extent ) -> void { histogram_shared[group_index] = 0; - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (all(thread_id.xy < src_extent.xy)) { - const f32x3 color = params.src_image.load(thread_id.xy).rgb; + const f32x3 color = params.src_image.Load(u32x3(thread_id.xy, 0)).rgb; const f32 luminance = std::rec2020_to_xyz(color).y; const u32 bin_index = bin_lum(luminance, params.environment.eye_max_exposure, params.environment.eye_min_exposure); - std::atomic_add(histogram_shared[bin_index], 1, std::memory_order_acq_rel); + __atomic_add(histogram_shared[bin_index], 1, MemoryOrder::AcquireRelease); } - std::control_barrier(std::memory_order_acq_rel); - std::atomic_add(params.histogram_bin_indices[group_index], histogram_shared[group_index], std::memory_order_relaxed); + GroupMemoryBarrierWithGroupSync(); + __atomic_add(params.histogram_bin_indices[group_index], histogram_shared[group_index], MemoryOrder::Relaxed); } diff --git a/Lorr/Engine/Resources/shaders/passes/hiz.slang b/Lorr/Engine/Resources/shaders/passes/hiz.slang index 0d166b06..1a01f2cb 100644 --- a/Lorr/Engine/Resources/shaders/passes/hiz.slang +++ b/Lorr/Engine/Resources/shaders/passes/hiz.slang @@ -20,7 +20,7 @@ typealias HiZMip = StorageImage2D; [[vk::binding(6)]] HiZMip dst_mip_3; [[vk::binding(7)]] HiZMip dst_mip_4; [[vk::binding(8)]] HiZMip dst_mip_5; -[[vk::binding(9)]] HiZMip dst_mip_6; +[[vk::binding(9)]] globallycoherent HiZMip dst_mip_6; [[vk::binding(10)]] HiZMip dst_mip_7; [[vk::binding(11)]] HiZMip dst_mip_8; [[vk::binding(12)]] HiZMip dst_mip_9; @@ -85,7 +85,7 @@ func reduce(f32x4 v) -> f32 { } func store(u32x2 texel, u32 mip, f32 v) -> void { - get_mip_image(mip).store(texel, v, mip == 6 ? MemoryScope::QueueFamily : MemoryScope::Device); + get_mip_image(mip).Store(texel, v); } func store_2x2(u32x2 p, u32 mip, f32x4 v) -> void { @@ -97,7 +97,7 @@ func store_2x2(u32x2 p, u32 mip, f32x4 v) -> void { func load(u32x2 texel) -> f32 { f32x2 uv = f32x2(texel) * inv_src_extent + inv_src_extent; - return src_image.sample_mip(sampler, uv, 0); + return src_image.SampleLevel(sampler, uv, 0); } func load_2x2(u32x2 p) -> f32x4 { @@ -121,7 +121,7 @@ func load_mid(u32x2 texel) -> f32 { texel = min(texel, src_extent >> 6); } - return dst_mip_6.load(texel, MemoryScope.QueueFamily); + return dst_mip_6.Load(texel); } func load_mid_2x2(u32x2 p) -> f32x4 { @@ -214,7 +214,7 @@ func cs_main(u32x2 group_id : SV_GroupID) -> void { shared_buffer[local_id >> 4] = d; } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (local_id < 16) { d = reduce_mip_simd(group_id * 4 + p, local_id, 4, shared_buffer[local_id]); @@ -228,15 +228,13 @@ func cs_main(u32x2 group_id : SV_GroupID) -> void { return; } - std::control_barrier(std::memory_order_acq_rel, - MemoryScope::Workgroup, MemoryScope::QueueFamily, - std::MemoryLocation::Image | std::MemoryLocation::Workgroup); - + AllMemoryBarrierWithGroupSync(); + if (local_id == 0) { - is_last = std::atomic_add(spd_global_atomic[0], 1, std::memory_order_acq_rel) == C.work_group_count - 1; + is_last = __atomic_add(spd_global_atomic[0], 1, MemoryOrder::AcquireRelease) == C.work_group_count - 1; } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (!is_last) { return; } @@ -259,7 +257,7 @@ func cs_main(u32x2 group_id : SV_GroupID) -> void { shared_buffer[local_id >> 4] = d; } - std::control_barrier(std::memory_order_acq_rel); + GroupMemoryBarrierWithGroupSync(); if (local_id < 16) { d = reduce_mip_simd(p, local_id, 10, shared_buffer[local_id]); diff --git a/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang index 3af00d0f..8b713bd1 100644 --- a/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang +++ b/Lorr/Engine/Resources/shaders/passes/hiz_slow.slang @@ -17,6 +17,6 @@ func cs_main( uniform u32x2 src_image_size, uniform u32 mip_index ) -> void { - let c = params.src_image.sample_mip(params.sampler, (f32x2(thread_id) + 0.5) / f32x2(src_image_size), mip_index).r; - params.dst_mip.store(thread_id.xy, c); + let c = params.src_image.SampleLevel(params.sampler, (f32x2(thread_id) + 0.5) / f32x2(src_image_size), mip_index).r; + params.dst_mip.Store(thread_id.xy, c); } diff --git a/Lorr/Engine/Resources/shaders/passes/imgui.slang b/Lorr/Engine/Resources/shaders/passes/imgui.slang index 0d0eb5e9..d74fe99d 100644 --- a/Lorr/Engine/Resources/shaders/passes/imgui.slang +++ b/Lorr/Engine/Resources/shaders/passes/imgui.slang @@ -40,5 +40,5 @@ func vs_main(VertexInput input) -> VertexOutput { [[shader("fragment")]] func fs_main(VertexOutput input) -> f32x4 { - return texture.sample(sampler, input.tex_coord) * input.color; + return texture.Sample(sampler, input.tex_coord) * input.color; } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang index e43a1c59..4d77b31e 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang @@ -69,13 +69,14 @@ func cs_main( t_max_max = max(0.0, t_max_max - length_to_atmosphere); } - AtmosphereIntegrateInfo info = {}; + AtmosphereIntegrateInfo info; info.eye_pos = eye_pos; info.eye_dir = world_dir; info.sun_dir = params.environment.sun_direction; info.sun_intensity = params.environment.sun_intensity; info.max_integration_length = t_max_max; info.eval_planet_luminance = false; + info.eval_multiscattering = true; info.sampling.variable_sample_count = false; info.sampling.initial_sample_count = max(1.0, (f32(thread_id.z) + 1.0) * 2.0); diff --git a/Lorr/Engine/Resources/shaders/passes/sky_final.slang b/Lorr/Engine/Resources/shaders/passes/sky_final.slang index 0f939813..d0df5b48 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_final.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_final.slang @@ -48,7 +48,7 @@ func fs_main( VertexOutput input, uniform ParameterBlock params ) -> f32x4 { - f32 depth = params.depth_image.sample_mip(params.sampler, input.tex_coord, 0.0); + f32 depth = params.depth_image.SampleLevel(params.sampler, input.tex_coord, 0.0); f32x3 NDC = f32x3(input.tex_coord * 2.0 - 1.0, depth); f32x4 world_pos_h = mul(params.camera.inv_projection_view_mat, f32x4(NDC, 1.0)); f32x3 world_pos = world_pos_h.xyz / world_pos_h.w; @@ -86,7 +86,7 @@ func fs_main( eye_altitude, view_zenith_cos_angle, light_on_plane); - f32x4 result = params.sky_view_lut.sample_mip(params.sampler, uv, 0.0); + f32x4 result = params.sky_view_lut.SampleLevel(params.sampler, uv, 0.0); f32x3 luminance = result.rgb; f32 transmittance = result.a; @@ -95,7 +95,7 @@ func fs_main( params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, f32x2(eye_altitude, sun_cos_theta)); - f32x3 sun_transmittance = params.sky_transmittance_lut.sample_mip(params.sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = params.sky_transmittance_lut.SampleLevel(params.sampler, transmittance_uv, 0.0).rgb; if (!planet_intersection.hasValue) { luminance += draw_sun(eye_dir, params.environment.sun_direction, 1.0) * params.environment.sun_intensity * sun_transmittance; diff --git a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang index 1fbe32d3..68e8445b 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang @@ -31,12 +31,13 @@ func cs_main( f32x3 sun_dir = f32x3(0.0, sun_cos_theta, std::safe_sqrt(1.0 - sun_cos_theta * sun_cos_theta)); f32x3 ray_pos = f32x3(0.0, altitude, 0.0); - AtmosphereIntegrateInfo info = {}; + AtmosphereIntegrateInfo info; info.eye_pos = ray_pos; info.sun_dir = sun_dir; info.eval_mie_phase = false; info.eval_rayleigh_phase = false; info.eval_planet_luminance = true; + info.eval_multiscattering = false; info.sampling.variable_sample_count = false; info.sampling.initial_sample_count = 32; @@ -44,16 +45,18 @@ func cs_main( f32x3 luminance = 0.0; f32x3 multi_scattering_as_1 = 0.0; + /* for (int i = 0; i < SAMPLE_COUNT; i++) { info.eye_dir = HEMISPHERE_64[i]; - const let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); + let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); multi_scattering_as_1 += result.multiscattering_as_1; luminance += result.luminance; } + */ - const let sphere_solid_angle = 4.0f * PI; - const let isotropic_phase = 1.0f / sphere_solid_angle; - const let inv_sample_count = 1.0 / f32(SAMPLE_COUNT); + let sphere_solid_angle = 4.0f * PI; + let isotropic_phase = 1.0f / sphere_solid_angle; + let inv_sample_count = 1.0 / f32(SAMPLE_COUNT); luminance *= sphere_solid_angle * inv_sample_count; multi_scattering_as_1 *= inv_sample_count; f32x3 scattered_luminance = luminance * isotropic_phase; diff --git a/Lorr/Engine/Resources/shaders/passes/sky_view.slang b/Lorr/Engine/Resources/shaders/passes/sky_view.slang index 107b6d99..50cab40f 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_view.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_view.slang @@ -31,7 +31,7 @@ func cs_main( eye_altitude); if (!move_to_top_atmosphere(eye_pos, eye_dir, params.environment.atmos_atmos_radius)) { - params.sky_view_lut.store(thread_id.xy, 0.0); + params.sky_view_lut.Store(thread_id.xy, 0.0); return; } @@ -39,7 +39,7 @@ func cs_main( let sun_zenith_cos_angle = dot(normalize(params.environment.sun_direction), up_vec); let sun_dir = normalize(f32x3(std::safe_sqrt(1.0 - sun_zenith_cos_angle * sun_zenith_cos_angle), sun_zenith_cos_angle, 0.0)); - AtmosphereIntegrateInfo info = {}; + AtmosphereIntegrateInfo info; info.eye_pos = eye_pos; info.eye_dir = eye_dir; info.sun_dir = sun_dir; @@ -52,9 +52,10 @@ func cs_main( info.transmittance_image = params.sky_transmittance_lut; info.multiscattering_image = params.sky_multiscattering_lut; + info.eval_multiscattering = true; let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); let transmittance = dot(result.transmittance, 1.0 / 3.0); - params.sky_view_lut.store(thread_id.xy, f32x4(result.luminance, transmittance)); + params.sky_view_lut.Store(thread_id.xy, f32x4(result.luminance, transmittance)); } diff --git a/Lorr/Engine/Resources/shaders/passes/tonemap.slang b/Lorr/Engine/Resources/shaders/passes/tonemap.slang index 791f9765..13900b34 100644 --- a/Lorr/Engine/Resources/shaders/passes/tonemap.slang +++ b/Lorr/Engine/Resources/shaders/passes/tonemap.slang @@ -552,7 +552,7 @@ struct GT7ToneMapping [[shader("fragment")]] f32x4 fs_main(VertexOutput input) { - f32x3 color = params.input_image.sample_mip(params.sampler, input.tex_coord, 0.0).rgb; + f32x3 color = params.input_image.SampleLevel(params.sampler, input.tex_coord, 0.0).rgb; if (params.environment.flags & EnvironmentFlags::HasEyeAdaptation) { let exposure = params.histogram_luminance.exposure; color = color * (exposure + 1.0); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 63765fc4..01d563cd 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -89,7 +89,7 @@ func compute_partial_derivatives(in f32x4x3 world_positions, in f32x2 uv, in f32 [[shader("fragment")]] func fs_main(VertexOutput input) -> FragmentOutput { - let texel = params.visbuffer.load(u32x2(input.position.xy)); + let texel = params.visbuffer.Load(u32x3(u32x2(input.position.xy), 0)); if (texel == ~0u) { discard; } @@ -114,6 +114,8 @@ func fs_main(VertexOutput input) -> FragmentOutput { let tex_coord_grad = deriv.gradient_of(tex_coords); FragmentOutput output = {}; + output.albedo_color.x = tex_coords[0].x; + return output; // ALBEDO ─────────────────────────────────────────────────────────── output.albedo_color = material.sample_albedo_color(tex_coord_grad); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index d4970a31..df1b832e 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -70,7 +70,7 @@ func fs_main(VertexOutput input) -> u32 { } #endif - std::atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, std::memory_order_acq_rel, std::MemoryLocation::Image, MemoryScope::QueueFamily); + InterlockedAdd(params.overdraw[u32x2(input.position.xy)], 1u); let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); return vis.encode(); diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index fab0ed8d..99970360 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -3,6 +3,8 @@ module scene; import std; import gpu; +#include + public const static f32 CAMERA_SCALE_UNIT = 0.01; public const static f32 INV_CAMERA_SCALE_UNIT = 1.0 / CAMERA_SCALE_UNIT; public const static f32 PLANET_RADIUS_OFFSET = 0.001; @@ -156,7 +158,7 @@ public struct Material { public func sample_albedo_color(in UVGradient grad) -> f32x4 { if (this.flags & MaterialFlag::HasAlbedoImage) { let color = bindless_images[this.albedo_image_index] - .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy); + .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy); return this.albedo_color * color; } @@ -165,13 +167,13 @@ public struct Material { public func sample_normal_color(in UVGradient grad) -> f32x3 { return bindless_images[this.normal_image_index] - .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; + .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; } public func sample_emissive_color(in UVGradient grad) -> f32x3 { if (this.flags & MaterialFlag::HasEmissiveImage) { let color = bindless_images[this.emissive_image_index] - .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; + .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; return this.emissive_color * color; } @@ -182,7 +184,7 @@ public struct Material { let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { let color = bindless_images[this.metallic_roughness_image_index] - .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).bg; + .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } @@ -192,7 +194,7 @@ public struct Material { public func sample_occlusion_color(in UVGradient grad) -> f32 { if (this.flags & MaterialFlag::HasOcclusionImage) { return bindless_images[this.occlusion_image_index] - .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).r; + .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).r; } return 1.0; diff --git a/Lorr/Engine/Resources/shaders/sky.slang b/Lorr/Engine/Resources/shaders/sky.slang index e0d156bc..c47a5d76 100644 --- a/Lorr/Engine/Resources/shaders/sky.slang +++ b/Lorr/Engine/Resources/shaders/sky.slang @@ -172,9 +172,10 @@ public struct AtmosphereIntegrateInfo { public constexpr bool eval_mie_phase = true; public constexpr bool eval_rayleigh_phase = true; public constexpr bool eval_planet_luminance = false; + public constexpr bool eval_multiscattering = false; public Image2D transmittance_image = {}; - public Optional> multiscattering_image = none; + public Image2D multiscattering_image = {}; }; public func integrate_single_scattered_luminance( @@ -247,13 +248,13 @@ public func integrate_single_scattered_luminance( f32x2 transmittance_uv = transmittance_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = info.transmittance_image.sample_mip(lut_sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = info.transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; f32x3 MS = 0.0; - if (info.multiscattering_image.hasValue) { + if (info.eval_multiscattering) { f32x2 multiscatter_uv = multiscattering_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, environment.multiscattering_lut_size.xy, altitude, sun_theta); - MS = info.multiscattering_image.value.sample_mip(lut_sampler, multiscatter_uv, 0.0).rgb; + MS = info.multiscattering_image.SampleLevel(lut_sampler, multiscatter_uv, 0.0).rgb; } f32x3 scattering_phase = 0.0; @@ -291,7 +292,7 @@ public func integrate_single_scattered_luminance( f32 NoL = saturate(dot(normalize(info.sun_dir), normalize(up_vec))); f32x2 transmittance_uv = transmittance_params_to_lut_uv(environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = info.transmittance_image.sample_mip(lut_sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = info.transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; result.luminance += info.sun_intensity * (sun_transmittance * result.transmittance * NoL * environment.atmos_terrain_albedo / PI); } @@ -329,7 +330,7 @@ public func sample_aerial_perspective( #if 0 return f32x4(relative_depth, linear_slice, non_linear_w, linear_w); #endif - f32x4 aerial_perspective = aerial_perspective_lut.sample_mip(sampler, f32x3(uv, non_linear_w), 0.0); + f32x4 aerial_perspective = aerial_perspective_lut.SampleLevel(sampler, f32x3(uv, non_linear_w), 0.0); aerial_perspective.xyz *= weight; aerial_perspective.w = 1.0 - (weight * (1.0 - aerial_perspective.w)); diff --git a/Lorr/Engine/Resources/shaders/std.slang b/Lorr/Engine/Resources/shaders/std.slang index 12242b4d..2a5a083e 100644 --- a/Lorr/Engine/Resources/shaders/std.slang +++ b/Lorr/Engine/Resources/shaders/std.slang @@ -8,7 +8,6 @@ module std; __include std.stdint; // std namespace -__include std.atomic; __include std.color; __include std.encoding; __include std.math; diff --git a/Lorr/Engine/Resources/shaders/std/atomic.slang b/Lorr/Engine/Resources/shaders/std/atomic.slang deleted file mode 100644 index 8be28137..00000000 --- a/Lorr/Engine/Resources/shaders/std/atomic.slang +++ /dev/null @@ -1,462 +0,0 @@ -implementing std; - -public namespace std { -// Can support up to SPIR-V 1.5 -public enum MemoryOrder : u32 { - Relaxed = 0x0, - Acquire = 0x2, - Release = 0x4, - AcqRel = 0x8, - SeqCst = 0x10, -}; -public static constexpr MemoryOrder memory_order_relaxed = MemoryOrder::Relaxed; -public static constexpr MemoryOrder memory_order_acquire = MemoryOrder::Acquire; -public static constexpr MemoryOrder memory_order_release = MemoryOrder::Release; -public static constexpr MemoryOrder memory_order_acq_rel = MemoryOrder::AcqRel; -public static constexpr MemoryOrder memory_order_seq_cst = MemoryOrder::SeqCst; - -public enum MemoryLocation : u32 { - None = 0, - Buffer = 0x40, - Subgroup = 0x80, - Workgroup = 0x100, - Image = 0x800, -}; - -func spirv_type_checks() -> void { - if (__type_equals()) { - spirv_asm { - OpExtension "SPV_EXT_shader_atomic_float_add"; - OpCapability AtomicFloat32MinMaxEXT - }; - } else if (__type_equals()) { - spirv_asm { - OpExtension "SPV_EXT_shader_atomic_float_add"; - OpCapability AtomicFloat16MinMaxEXT - }; - } else if (__type_equals()) { - spirv_asm { - OpExtension "SPV_EXT_shader_atomic_float_add"; - OpCapability AtomicFloat64MinMaxEXT - }; - } else if (__type_equals() || - __type_equals()) { - spirv_asm { - OpCapability Int64Atomics - }; - } -} - -[[ForceInline]] -public func atomic_load( - __ref T dst, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicLoad &dst $scope $semantics; - }; -} - -[[ForceInline]] -public func atomic_store( - __ref T dst, - T desired, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> void { - spirv_type_checks(); - const u32 semantics = memory_order | location; - spirv_asm { - OpAtomicStore &dst $scope $semantics $desired; - }; -} - -[[ForceInline]] -public func atomic_increment( - __ref T dst, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicIIncrement &dst $scope $semantics; - }; -} - -[[ForceInline]] -public func atomic_decrement( - __ref T dst, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicIDecrement &dst $scope $semantics; - }; -} - -[[ForceInline]] -public func atomic_add( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - constexpr u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicIAdd &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_sub( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - constexpr u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicISub &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_max( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - if (__isUnsignedInt()) { - return spirv_asm { - result:$$T = OpAtomicUMax &dst $scope $semantics $value; - }; - } else if (__isSignedInt()) { - return spirv_asm { - result:$$T = OpAtomicSMax &dst $scope $semantics $value; - }; - } else { - spirv_asm { "" }; - } - - return {}; -} - -[[ForceInline]] -public func atomic_min( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - if (__isUnsignedInt()) { - return spirv_asm { - result:$$T = OpAtomicUMin &dst $scope $semantics $value; - }; - } else if (__isSignedInt()) { - return spirv_asm { - result:$$T = OpAtomicSMin &dst $scope $semantics $value; - }; - } else { - spirv_asm { "" }; - } - - return {}; -} - -[[ForceInline]] -public func atomic_and( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicAnd &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_or( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicOr &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_xor( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicXor &dst $scope $semantics $value; - }; -} - -// Floating point atomic extension ────────────────────────────────── -[[ForceInline]] -public func atomic_add( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicFAddEXT &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_max( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicFMaxEXT &dst $scope $semantics $value; - }; -} - -[[ForceInline]] -public func atomic_min( - __ref T dst, - T value, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup -) -> T { - spirv_type_checks(); - const u32 semantics = memory_order | location; - return spirv_asm { - result:$$T = OpAtomicFMinEXT &dst $scope $semantics $value; - }; -} - -// Base type for atomics. Can perform very basic operations -// where it's supported without extensions. -public struct atomic { - T value; - - [[ForceInline]] - [[mutating]] - public __init(T v) { - this.store(v, std::memory_order_acq_rel); - } - - [[ForceInline]] - [[mutating]] - public func load( - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_load(this.value, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func store( - T desired, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> void { - atomic_store(this.value, desired, memory_order, location, scope); - } -}; - -// Integer extension ──────────────────────────────────────────────── -public extension atomic { - [[ForceInline]] - [[mutating]] - public func fetch_add( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_add(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_sub( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_sub(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_max( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_max(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_min( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_min(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_and( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_and(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_or( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_or(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_xor( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_xor(this.value, arg, memory_order, location, scope); - } -}; - -// Floating point extension ───────────────────────────────────────── -public extension atomic { - [[ForceInline]] - [[mutating]] - public func fetch_add( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_add(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_max( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_max(this.value, arg, memory_order, location, scope); - } - - [[ForceInline]] - [[mutating]] - public func fetch_min( - T arg, - constexpr MemoryOrder memory_order, - constexpr MemoryLocation location = MemoryLocation::None, - constexpr MemoryScope scope = MemoryScope::Workgroup - ) -> T { - return atomic_min(this.value, arg, memory_order, location, scope); - } -}; - -// Barriers ───────────────────────────────────────────────────────── - -// Wait for all invocations in the scope restricted tangle to reach -// the current point of execution before executing further instructions. -// -// Execution is the scope defining the scope restricted tangle -// affected by this command. -// -// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpControlBarrier -[[ForceInline]] -public func control_barrier( - constexpr MemoryOrder memory_order, - constexpr MemoryScope scope_execution = MemoryScope::Workgroup, - constexpr MemoryScope scope = MemoryScope::Workgroup, - constexpr MemoryLocation location = MemoryLocation::Workgroup -) -> void { - constexpr u32 semantics = memory_order | location; - spirv_asm { - OpControlBarrier $scope_execution $scope $semantics; - }; -} - -// Good old barrier. -[[ForceInline]] -public func memory_barrier( - constexpr MemoryOrder memory_order, - constexpr MemoryScope scope = MemoryScope::Workgroup, - constexpr MemoryLocation location = MemoryLocation::None -) -> void { - constexpr u32 semantics = memory_order | location; - spirv_asm { - OpMemoryBarrier $scope $semantics; - }; -} -} - diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index fce026e5..2ae308b1 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -204,7 +204,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 8_sz; + constexpr static auto MAX_LODS = 1_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 9a05d15d..c2955ade 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -332,11 +332,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); } - if (info.max_meshlet_instance_count > 0) { - prepared_frame.meshlet_instances_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(GPU::MeshletInstance)); - } - info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); info.environment.sky_view_lut_size = self.sky_view_lut_extent; info.environment.multiscattering_lut_size = self.sky_multiscatter_lut_view.extent(); @@ -443,15 +438,7 @@ static auto cull_meshes( .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) .dispatch_invocations(mesh_instance_count); - return std::make_tuple( - camera, - meshes, - transforms, - mesh_instances, - meshlet_instances, - visible_meshlet_instances_count, - debug_drawer - ); + return std::make_tuple(camera, meshes, transforms, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); } ); @@ -491,10 +478,7 @@ static auto cull_meshes( auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = - generate_cull_commands_pass( - std::move(visible_meshlet_instances_count_buffer), - std::move(cull_meshlets_cmd_buffer) - ); + generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); return cull_meshlets_cmd_buffer; } @@ -1152,9 +1136,10 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; - vuk::Value meshlet_instances_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; diff --git a/raddgb.proj b/raddgb.proj new file mode 100644 index 00000000..18557f1f --- /dev/null +++ b/raddgb.proj @@ -0,0 +1,20 @@ +// raddbg 0.9.21 project file + +recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/Exception.hpp" +recent_file: path: "Lorr/Engine/Scene/SceneRenderer.cc" +recent_file: path: "Lorr/Engine/Graphics/Slang/Compiler.cc" +recent_file: path: "Lorr/Engine/Graphics/Vulkan/Pipeline.cc" +recent_file: path: "../../AppData/Local/.xmake/cache/packages/2508/v/vuk/2025.07.09/source/vuk/src/runtime/vk/DeviceVkResource.cpp" +recent_file: path: "../../../../SDKBuild/build-X64-1.4.321.1/Vulkan-ValidationLayers/layers/chassis/chassis_manual.cpp" +recent_file: path: "../../../../SDKBuild/build-X64-1.4.321.1/Vulkan-ValidationLayers/layers/chassis/dispatch_object_manual.cpp" +recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/Value.hpp" +recent_file: path: "Lorr/Engine/Graphics/VulkanDevice.hh" +recent_file: path: "Lorr/Engine/Graphics/Vulkan/TransferManager.cc" +recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/RenderGraph.hpp" +recent_file: path: "../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/expected" +target: +{ + executable: "build/windows/x64/debug/Editor.exe" + working_directory: "build/windows/x64/debug" + enabled: 1 +} diff --git a/xmake/packages.lua b/xmake/packages.lua index 1e0acce2..b75dcea1 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -1,3 +1,5 @@ +add_repositories("local repo", {rootdir = os.scriptdir()}) + local fmt_version = "11.2.0" local fmt_configs = { header_only = false, shared = false } add_requires("fmt " .. fmt_version, { configs = fmt_configs, system = false }) @@ -53,7 +55,7 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") -add_requires("shader-slang v2025.12.1") +add_requires("shader-slang v2025.15") add_requires("vuk 2025.07.09", { configs = { debug_allocations = false, disable_exceptions = false, diff --git a/xmake/repo/packages/s/shader-slang/xmake.lua b/xmake/repo/packages/s/shader-slang/xmake.lua new file mode 100644 index 00000000..6bc62ca3 --- /dev/null +++ b/xmake/repo/packages/s/shader-slang/xmake.lua @@ -0,0 +1,48 @@ +package("shader-slang") + set_homepage("https://github.com/shader-slang/slang") + set_description("Making it easier to work with shaders") + set_license("MIT") + + if is_host("windows") then + add_urls("https://github.com/shader-slang/slang/releases/download/v$(version)/slang-$(version)-windows-x86_64.tar.gz", + {version = function (version) return version:gsub("v", "") end}) + + add_versions("v2025.10.4", "f4199d9cb32f93410444713adfe880da2b665a9e13f2f8e23fdbff06068a9ff3") + add_versions("v2025.12.1", "02018cc923a46c434e23b166ef13c14165b0a0c4b863279731c4f6c4898fbf8e") + add_versions("v2025.15", "f37e7215e51bee4e8f5ec7b84a5d783deb6cbd0bd033c026b94f2d5a31e88d28") + elseif is_host("linux") then + add_urls("https://github.com/shader-slang/slang/releases/download/v$(version)/slang-$(version)-linux-x86_64.tar.gz", + {version = function (version) return version:gsub("v", "") end}) + + add_versions("v2025.10.4", "c2edcfdada38feb345725613c516a842700437f6fa55910b567b9058c415ce8f") + add_versions("v2025.12.1", "8f34b98391562ce6f97d899e934645e2c4466a02e66b69f69651ff1468553b27") + end + + on_install("windows|x64", "linux|x86_64", function (package) + os.cp("include/*.h", package:installdir("include")) + + os.trycp("lib/*slang.*", package:installdir("lib")) + os.trycp("bin/*slang.*", package:installdir("lib")) + + os.trycp("lib/*slang-glslang.*", package:installdir("lib")) + os.trycp("bin/*slang-glslang.*", package:installdir("lib")) + + os.trycp("lib/*slang-glsl-module.*", package:installdir("lib")) + os.trycp("bin/*slang-glsl-module.*", package:installdir("lib")) + + package:addenv("PATH", "bin") + end) + + on_test(function (package) + assert(package:check_cxxsnippets({ test = [[ + #include + #include + + void test() { + Slang::ComPtr global_session; + slang::createGlobalSession(global_session.writeRef()); + } + ]] }, {configs = {languages = "c++17"}})) + end) + +package_end() \ No newline at end of file From 8f690763435e22495b4402aa24a8a065d3ecd18c Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:44:43 +0300 Subject: [PATCH 19/27] bounds check during vis decode --- Lorr/Engine/Asset/Asset.cc | 15 ++++-- Lorr/Engine/Graphics/Slang/Compiler.cc | 4 +- .../Engine/Graphics/Vulkan/TransferManager.cc | 7 ++- Lorr/Engine/Resources/shaders/assert.slang | 2 +- .../Resources/shaders/passes/brdf.slang | 7 ++- .../passes/sky_aerial_perspective.slang | 7 +-- .../shaders/passes/sky_multiscattering.slang | 8 +-- .../Resources/shaders/passes/sky_view.slang | 8 +-- .../shaders/passes/visbuffer_decode.slang | 10 ++-- Lorr/Engine/Resources/shaders/sky.slang | 13 +++-- Lorr/Engine/Scene/SceneRenderer.cc | 52 +++++++++---------- raddgb.proj | 20 ------- xmake/packages.lua | 2 +- 13 files changed, 67 insertions(+), 88 deletions(-) delete mode 100644 raddgb.proj diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index a8963b58..7b62e72a 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -799,7 +799,6 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool ZoneNamedN(z, "GPU Meshlet Generation", true); auto &cur_lod = gpu_mesh.lods[lod_index]; - auto simplified_indices = std::vector(); if (lod_index == 0) { simplified_indices = std::vector(mesh_indices.begin(), mesh_indices.end()); @@ -825,7 +824,7 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool nullptr, lod_index_count, TARGET_ERROR, - meshopt_SimplifyLockBorder | meshopt_SimplifyPermissive, + meshopt_SimplifyLockBorder, &result_error ); @@ -880,8 +879,16 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); for (u32 i = 0; i < raw_meshlet.triangle_count * 3; i++) { - const auto &tri_pos = mesh_vertices - [indirect_vertex_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; + auto local_triangle_index_offset = raw_meshlet.triangle_offset + i; + LS_EXPECT(local_triangle_index_offset < local_triangle_indices.size()); + auto local_triangle_index = local_triangle_indices[local_triangle_index_offset]; + LS_EXPECT(local_triangle_index < raw_meshlet.vertex_count); + auto indirect_vertex_index_offset = raw_meshlet.vertex_offset + local_triangle_index; + LS_EXPECT(indirect_vertex_index_offset < indirect_vertex_indices.size()); + auto indirect_vertex_index = indirect_vertex_indices[indirect_vertex_index_offset]; + LS_EXPECT(indirect_vertex_index < vertex_count); + + const auto &tri_pos = mesh_vertices[indirect_vertex_index]; meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index 9b2cd286..fce9b645 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -328,7 +328,7 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::option ls::optionnon_coherent_atom_size() }; +#if 1 + auto buffer = vuk::Buffer{}; self.device->allocator->allocate_buffers(std::span{ &buffer, 1 }, std::span{ &buffer_info, 1 }, LOC); - return buffer; +#else + return **vuk::allocate_buffer(*self.device->allocator, buffer_info); +#endif } auto TransferManager::alloc_transient_buffer(this TransferManager &self, vuk::MemoryUsage usage, usize size, vuk::source_location LOC) diff --git a/Lorr/Engine/Resources/shaders/assert.slang b/Lorr/Engine/Resources/shaders/assert.slang index d1c6307c..653daadb 100644 --- a/Lorr/Engine/Resources/shaders/assert.slang +++ b/Lorr/Engine/Resources/shaders/assert.slang @@ -1,6 +1,6 @@ #ifdef ENABLE_ASSERTIONS #define assert_msg(x, msg, ...) do { if (!bool(x)) { printf(msg, __VA_ARGS__); } } while(false) -#define assert(x) assert_msg(x, "Shader aborted at " __FILE__ ":%d", __LINE__) +#define assert(x) assert_msg(x, "Shader aborted at " __FILE__ ":%d\n", __LINE__) #else #define assert_msg(...) #define assert(...) diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index fcf3204c..34584812 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -71,7 +71,7 @@ func fs_main(VertexOutput input) -> f32x4 { sun_illuminance = sun_transmittance * params.environment.sun_intensity; // SKY AMBIENT COLOR ──────────────────────────────────────────────── - AtmosphereIntegrateInfo sky_info; + AtmosphereIntegrateInfo sky_info = {}; sky_info.eye_pos = eye_pos; sky_info.eye_dir = up_vec; sky_info.sun_dir = L; @@ -79,11 +79,10 @@ func fs_main(VertexOutput input) -> f32x4 { sky_info.sampling.variable_sample_count = true; sky_info.sampling.min_sample_count = 1; sky_info.sampling.max_sample_count = 4; - sky_info.transmittance_image = params.sky_transmittance_lut; - sky_info.multiscattering_image = params.sky_multiscattering_lut; sky_info.eval_multiscattering = true; sky_info.eval_mie_phase = false; - let sky_result = integrate_single_scattered_luminance(params.environment, params.linear_clamp_sampler, sky_info); + let sky_result = integrate_single_scattered_luminance( + sky_info, params.environment, params.linear_clamp_sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); var eye_gradient = dot(N, up_vec); eye_gradient = (eye_gradient + 1.0) * 0.375 + 0.25; diff --git a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang index 4d77b31e..a6f4f080 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang @@ -69,7 +69,7 @@ func cs_main( t_max_max = max(0.0, t_max_max - length_to_atmosphere); } - AtmosphereIntegrateInfo info; + AtmosphereIntegrateInfo info = {}; info.eye_pos = eye_pos; info.eye_dir = world_dir; info.sun_dir = params.environment.sun_direction; @@ -81,10 +81,7 @@ func cs_main( info.sampling.variable_sample_count = false; info.sampling.initial_sample_count = max(1.0, (f32(thread_id.z) + 1.0) * 2.0); - info.transmittance_image = params.sky_transmittance_lut; - info.multiscattering_image = params.sky_multiscattering_lut; - - let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); + let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); let transmittance = dot(result.transmittance, f32x3(1.0f / 3.0f)); params.sky_aerial_perspective_lut.Store(thread_id, f32x4(result.luminance, transmittance)); } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang index 68e8445b..0c227c51 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang @@ -31,7 +31,7 @@ func cs_main( f32x3 sun_dir = f32x3(0.0, sun_cos_theta, std::safe_sqrt(1.0 - sun_cos_theta * sun_cos_theta)); f32x3 ray_pos = f32x3(0.0, altitude, 0.0); - AtmosphereIntegrateInfo info; + AtmosphereIntegrateInfo info = {}; info.eye_pos = ray_pos; info.sun_dir = sun_dir; info.eval_mie_phase = false; @@ -41,18 +41,14 @@ func cs_main( info.sampling.variable_sample_count = false; info.sampling.initial_sample_count = 32; - info.transmittance_image = params.sky_transmittance_lut; - f32x3 luminance = 0.0; f32x3 multi_scattering_as_1 = 0.0; - /* for (int i = 0; i < SAMPLE_COUNT; i++) { info.eye_dir = HEMISPHERE_64[i]; - let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); + let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_transmittance_lut); multi_scattering_as_1 += result.multiscattering_as_1; luminance += result.luminance; } - */ let sphere_solid_angle = 4.0f * PI; let isotropic_phase = 1.0f / sphere_solid_angle; diff --git a/Lorr/Engine/Resources/shaders/passes/sky_view.slang b/Lorr/Engine/Resources/shaders/passes/sky_view.slang index 50cab40f..87c0b097 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_view.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_view.slang @@ -39,7 +39,7 @@ func cs_main( let sun_zenith_cos_angle = dot(normalize(params.environment.sun_direction), up_vec); let sun_dir = normalize(f32x3(std::safe_sqrt(1.0 - sun_zenith_cos_angle * sun_zenith_cos_angle), sun_zenith_cos_angle, 0.0)); - AtmosphereIntegrateInfo info; + AtmosphereIntegrateInfo info = {}; info.eye_pos = eye_pos; info.eye_dir = eye_dir; info.sun_dir = sun_dir; @@ -49,12 +49,8 @@ func cs_main( info.sampling.variable_sample_count = true; info.sampling.min_sample_count = sample_count; info.sampling.max_sample_count = sample_count; - - info.transmittance_image = params.sky_transmittance_lut; - info.multiscattering_image = params.sky_multiscattering_lut; info.eval_multiscattering = true; - - let result = integrate_single_scattered_luminance(params.environment, params.sampler, info); + let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); let transmittance = dot(result.transmittance, 1.0 / 3.0); params.sky_view_lut.Store(thread_id.xy, f32x4(result.luminance, transmittance)); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 01d563cd..e1453bda 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -94,6 +94,7 @@ func fs_main(VertexOutput input) -> FragmentOutput { discard; } + FragmentOutput output = {}; let vis = VisBufferData(texel); let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; @@ -105,6 +106,11 @@ func fs_main(VertexOutput input) -> FragmentOutput { let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; let indices = meshlet.indices(mesh_lod, vis.triangle_index); + if (any(indices > (mesh.vertex_count - 1))) { + // we are somehow OOB'ing + return output; + } + let positions = meshlet.positions(mesh, indices); let normals = meshlet.normals(mesh, indices); let tex_coords = meshlet.tex_coords(mesh, indices); @@ -113,10 +119,6 @@ func fs_main(VertexOutput input) -> FragmentOutput { let deriv = compute_partial_derivatives(world_positions, NDC.xy, params.camera.resolution); let tex_coord_grad = deriv.gradient_of(tex_coords); - FragmentOutput output = {}; - output.albedo_color.x = tex_coords[0].x; - return output; - // ALBEDO ─────────────────────────────────────────────────────────── output.albedo_color = material.sample_albedo_color(tex_coord_grad); diff --git a/Lorr/Engine/Resources/shaders/sky.slang b/Lorr/Engine/Resources/shaders/sky.slang index c47a5d76..a41a58ff 100644 --- a/Lorr/Engine/Resources/shaders/sky.slang +++ b/Lorr/Engine/Resources/shaders/sky.slang @@ -173,15 +173,14 @@ public struct AtmosphereIntegrateInfo { public constexpr bool eval_rayleigh_phase = true; public constexpr bool eval_planet_luminance = false; public constexpr bool eval_multiscattering = false; - - public Image2D transmittance_image = {}; - public Image2D multiscattering_image = {}; }; public func integrate_single_scattered_luminance( + in AtmosphereIntegrateInfo info, in Environment environment, in Sampler lut_sampler, - in AtmosphereIntegrateInfo info + in Image2D transmittance_image = {}, + in Image2D multiscattering_image = {} ) -> AtmosphereLuminance { AtmosphereLuminance result = {}; @@ -248,13 +247,13 @@ public func integrate_single_scattered_luminance( f32x2 transmittance_uv = transmittance_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = info.transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; f32x3 MS = 0.0; if (info.eval_multiscattering) { f32x2 multiscatter_uv = multiscattering_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, environment.multiscattering_lut_size.xy, altitude, sun_theta); - MS = info.multiscattering_image.SampleLevel(lut_sampler, multiscatter_uv, 0.0).rgb; + MS = multiscattering_image.SampleLevel(lut_sampler, multiscatter_uv, 0.0).rgb; } f32x3 scattering_phase = 0.0; @@ -292,7 +291,7 @@ public func integrate_single_scattered_luminance( f32 NoL = saturate(dot(normalize(info.sun_dir), normalize(up_vec))); f32x2 transmittance_uv = transmittance_params_to_lut_uv(environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = info.transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; + f32x3 sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; result.luminance += info.sun_intensity * (sun_transmittance * result.transmittance * NoL * environment.atmos_terrain_albedo / PI); } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 7a7bfe2b..444f4e99 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1260,7 +1260,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value); - // std::tie( - // camera_buffer, - // meshlet_instances_buffer, - // mesh_instances_buffer, - // meshes_buffer, - // transforms_buffer, - // visbuffer_attachment, - // albedo_attachment, - // normal_attachment, - // emissive_attachment, - // metallic_roughness_occlusion_attachment - // ) = - // vis_decode_pass( - // std::move(camera_buffer), - // std::move(meshlet_instances_buffer), - // std::move(mesh_instances_buffer), - // std::move(meshes_buffer), - // std::move(transforms_buffer), - // std::move(materials_buffer), - // std::move(visbuffer_attachment), - // std::move(albedo_attachment), - // std::move(normal_attachment), - // std::move(emissive_attachment), - // std::move(metallic_roughness_occlusion_attachment) - // ); + std::tie( + camera_buffer, + meshlet_instances_buffer, + mesh_instances_buffer, + meshes_buffer, + transforms_buffer, + visbuffer_attachment, + albedo_attachment, + normal_attachment, + emissive_attachment, + metallic_roughness_occlusion_attachment + ) = + vis_decode_pass( + std::move(camera_buffer), + std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(albedo_attachment), + std::move(normal_attachment), + std::move(emissive_attachment), + std::move(metallic_roughness_occlusion_attachment) + ); // ── BRDF ──────────────────────────────────────────────────────────── auto brdf_pass = vuk::make_pass( diff --git a/raddgb.proj b/raddgb.proj deleted file mode 100644 index 18557f1f..00000000 --- a/raddgb.proj +++ /dev/null @@ -1,20 +0,0 @@ -// raddbg 0.9.21 project file - -recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/Exception.hpp" -recent_file: path: "Lorr/Engine/Scene/SceneRenderer.cc" -recent_file: path: "Lorr/Engine/Graphics/Slang/Compiler.cc" -recent_file: path: "Lorr/Engine/Graphics/Vulkan/Pipeline.cc" -recent_file: path: "../../AppData/Local/.xmake/cache/packages/2508/v/vuk/2025.07.09/source/vuk/src/runtime/vk/DeviceVkResource.cpp" -recent_file: path: "../../../../SDKBuild/build-X64-1.4.321.1/Vulkan-ValidationLayers/layers/chassis/chassis_manual.cpp" -recent_file: path: "../../../../SDKBuild/build-X64-1.4.321.1/Vulkan-ValidationLayers/layers/chassis/dispatch_object_manual.cpp" -recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/Value.hpp" -recent_file: path: "Lorr/Engine/Graphics/VulkanDevice.hh" -recent_file: path: "Lorr/Engine/Graphics/Vulkan/TransferManager.cc" -recent_file: path: "../../AppData/Local/.xmake/packages/v/vuk/2025.07.09/9522f98b6e404372ad5e00af87c4b4f9/include/vuk/RenderGraph.hpp" -recent_file: path: "../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/expected" -target: -{ - executable: "build/windows/x64/debug/Editor.exe" - working_directory: "build/windows/x64/debug" - enabled: 1 -} diff --git a/xmake/packages.lua b/xmake/packages.lua index b75dcea1..a73b9827 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -61,7 +61,7 @@ add_requires("vuk 2025.07.09", { configs = { disable_exceptions = false, }, debug = is_mode("debug") }) -add_requires("meshoptimizer v0.25") +add_requires("meshoptimizer v0.24") add_requires("ktx v4.4.0", { debug = true }) add_requires("svector v1.0.3") From 1ee29e0a7350de889863e68c1f5d276bfe5a02c2 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:58:45 +0300 Subject: [PATCH 20/27] add slang for linux --- xmake/repo/packages/s/shader-slang/xmake.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xmake/repo/packages/s/shader-slang/xmake.lua b/xmake/repo/packages/s/shader-slang/xmake.lua index 6bc62ca3..c35d2391 100644 --- a/xmake/repo/packages/s/shader-slang/xmake.lua +++ b/xmake/repo/packages/s/shader-slang/xmake.lua @@ -16,6 +16,7 @@ package("shader-slang") add_versions("v2025.10.4", "c2edcfdada38feb345725613c516a842700437f6fa55910b567b9058c415ce8f") add_versions("v2025.12.1", "8f34b98391562ce6f97d899e934645e2c4466a02e66b69f69651ff1468553b27") + add_versions("v2025.15", "1eaa24f1f0483f8b8cc4b95153c815394d2f6cae08dbaf8b18d6b7975b8bbe03") end on_install("windows|x64", "linux|x86_64", function (package) @@ -45,4 +46,4 @@ package("shader-slang") ]] }, {configs = {languages = "c++17"}})) end) -package_end() \ No newline at end of file +package_end() From 52c3bab5eedae7a10f5a1ea3c3093c96e0531e0a Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 29 Aug 2025 14:58:32 +0300 Subject: [PATCH 21/27] refactor transfer manager --- Lorr/Engine/Asset/Asset.cc | 8 +- Lorr/Engine/Graphics/ImGuiRenderer.cc | 4 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 17 --- .../Engine/Graphics/Vulkan/TransferManager.cc | 121 +++++++++--------- Lorr/Engine/Graphics/VulkanDevice.hh | 36 ++---- Lorr/Engine/Scene/SceneRenderer.cc | 14 +- Lorr/ls/defer.hh | 2 +- 7 files changed, 82 insertions(+), 120 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 7b62e72a..b2191a9a 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -984,7 +984,7 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool auto gpu_mesh_buffer_handle = device.buffer(gpu_mesh_buffer.id()); auto gpu_mesh_subrange = vuk::discard_buf("mesh", gpu_mesh_buffer_handle->subrange(0, mesh_upload_size)); - gpu_mesh_subrange = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_subrange)); + gpu_mesh_subrange = transfer_man.upload(std::move(cpu_mesh_buffer), std::move(gpu_mesh_subrange)); transfer_man.wait_on(std::move(gpu_mesh_subrange)); for (auto lod_index = 0_sz; lod_index < gpu_mesh.lod_count; lod_index++) { @@ -998,7 +998,7 @@ auto AssetManager::load_model(this AssetManager &self, const UUID &uuid) -> bool lod.indirect_vertex_indices += gpu_mesh_bda + mesh_upload_offset; auto gpu_lod_subrange = vuk::discard_buf("mesh lod subrange", gpu_mesh_buffer_handle->subrange(mesh_upload_offset, lod_upload_size)); - gpu_lod_subrange = transfer_man.upload_staging(std::move(lod_cpu_buffer), std::move(gpu_lod_subrange)); + gpu_lod_subrange = transfer_man.upload(std::move(lod_cpu_buffer), std::move(gpu_lod_subrange)); transfer_man.wait_on(std::move(gpu_lod_subrange)); mesh_upload_offset += lod_upload_size; @@ -1155,7 +1155,7 @@ auto AssetManager::load_texture(this AssetManager &self, const UUID &uuid, const } auto image_data = std::move(parsed_image->data); - auto buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, ls::size_bytes(image_data)); + auto buffer = transfer_man.alloc_image_buffer(format, extent); std::memcpy(buffer->mapped_ptr, image_data.data(), image_data.size()); dst_attachment = vuk::copy(std::move(buffer), std::move(dst_attachment)); @@ -1181,7 +1181,7 @@ auto AssetManager::load_texture(this AssetManager &self, const UUID &uuid, const .depth = 1, }; auto size = vuk::compute_image_size(format, level_extent); - auto buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, size); + auto buffer = transfer_man.alloc_image_buffer(format, level_extent); // TODO, WARN: size param might not be safe. Check with asan. std::memcpy(buffer->mapped_ptr, image_data.data() + mip_data_offset, size); diff --git a/Lorr/Engine/Graphics/ImGuiRenderer.cc b/Lorr/Engine/Graphics/ImGuiRenderer.cc index 5ecdbb69..9bc1274c 100644 --- a/Lorr/Engine/Graphics/ImGuiRenderer.cc +++ b/Lorr/Engine/Graphics/ImGuiRenderer.cc @@ -191,10 +191,8 @@ auto ImGuiRenderer::end_frame(this ImGuiRenderer &self, vuk::ValueBytesPerPixel; - auto buffer_size = ls::align_up(upload_pitch * upload_extent.height, buffer_alignment); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, buffer_size); + auto upload_buffer = transfer_man.alloc_image_buffer(vuk::Format::eR8G8B8A8Srgb, upload_extent); auto *buffer_ptr = reinterpret_cast(upload_buffer->mapped_ptr); for (auto y = 0_u32; y < upload_extent.height; y++) { auto *pixels = static_cast(texture->GetPixelsAt(upload_offset.x, upload_offset.y + static_cast(y))); diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 69a3930e..86f0f4a2 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -286,23 +286,6 @@ auto Device::init_resources(this Device &self) -> std::expected void { self.release(); } -auto TransferManager::alloc_transient_buffer_raw(this TransferManager &self, vuk::MemoryUsage usage, usize size, vuk::source_location LOC) - -> vuk::Buffer { +auto TransferManager::alloc_transient_buffer(this TransferManager &self, vuk::MemoryUsage usage, usize size, vuk::source_location LOC) noexcept + -> vuk::Value { ZoneScoped; - auto read_lock = std::shared_lock(self.mutex); - auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = usage, .size = size, .alignment = self.device->non_coherent_atom_size() }; -#if 1 auto buffer = vuk::Buffer{}; - self.device->allocator->allocate_buffers(std::span{ &buffer, 1 }, std::span{ &buffer_info, 1 }, LOC); - return buffer; -#else - return **vuk::allocate_buffer(*self.device->allocator, buffer_info); -#endif + auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = usage, .size = size, .alignment = self.device->non_coherent_atom_size() }; + self.frame_allocator->allocate_buffers(std::span{ &buffer, 1 }, std::span{ &buffer_info, 1 }, LOC); + + return vuk::acquire_buf("transient buffer", buffer, vuk::Access::eNone, LOC); } -auto TransferManager::alloc_transient_buffer(this TransferManager &self, vuk::MemoryUsage usage, usize size, vuk::source_location LOC) - -> vuk::Value { +auto TransferManager::alloc_image_buffer(this TransferManager &self, vuk::Format format, vuk::Extent3D extent) noexcept -> vuk::Value { ZoneScoped; - auto buffer = self.alloc_transient_buffer_raw(usage, size, LOC); - return vuk::acquire_buf("transient buffer", buffer, vuk::Access::eNone, LOC); + auto write_lock = std::unique_lock(self.mutex); + auto alignment = vuk::format_to_texel_block_size(format); + auto size = vuk::compute_image_size(format, extent); + + auto buffer_handle = vuk::Buffer{}; + auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = vuk::MemoryUsage::eCPUtoGPU, .size = size, .alignment = alignment }; + self.device->allocator->allocate_buffers({ &buffer_handle, 1 }, { &buffer_info, 1 }); + + auto buffer = vuk::acquire_buf("image buffer", buffer_handle, vuk::Access::eNone); + self.image_buffers.emplace(buffer); + + return buffer; } -auto TransferManager::upload_staging(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, vuk::source_location LOC) +auto TransferManager::upload(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, vuk::source_location LOC) -> vuk::Value { ZoneScoped; @@ -55,53 +60,30 @@ auto TransferManager::upload_staging(this TransferManager &, vuk::Value &&src, Buffer &dst, u64 dst_offset, vuk::source_location LOC) - -> vuk::Value { - ZoneScoped; - - auto dst_handle = self.device->buffer(dst.id()); - auto dst_buffer = vuk::discard_buf("dst", dst_handle->subrange(dst_offset, src->size), LOC); - return self.upload_staging(std::move(src), std::move(dst_buffer), LOC); -} - -auto TransferManager::upload_staging( - this TransferManager &self, - void *data, - u64 data_size, - vuk::Value &&dst, - u64 dst_offset, - vuk::source_location LOC -) -> vuk::Value { - ZoneScoped; - - auto cpu_buffer = self.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, data_size, LOC); - std::memcpy(cpu_buffer->mapped_ptr, data, data_size); - - auto dst_buffer = vuk::discard_buf("dst", dst->subrange(dst_offset, cpu_buffer->size), LOC); - return self.upload_staging(std::move(cpu_buffer), std::move(dst_buffer), LOC); -} - -auto TransferManager::upload_staging(this TransferManager &self, void *data, u64 data_size, Buffer &dst, u64 dst_offset, vuk::source_location LOC) - -> vuk::Value { - ZoneScoped; - - auto cpu_buffer = self.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, data_size, LOC); - std::memcpy(cpu_buffer->mapped_ptr, data, data_size); - - auto dst_handle = self.device->buffer(dst.id()); - auto dst_buffer = vuk::discard_buf("dst", dst_handle->subrange(dst_offset, cpu_buffer->size), LOC); - return self.upload_staging(std::move(cpu_buffer), std::move(dst_buffer), LOC); -} - -auto TransferManager::upload_staging(this TransferManager &self, ImageView &image_view, void *data, u64, vuk::source_location LOC) +[[nodiscard]] +auto TransferManager::upload(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, vuk::source_location LOC) -> vuk::Value { ZoneScoped; - std::shared_lock _(self.mutex); - auto dst_attachment_info = image_view.to_attachment(*self.device, vuk::ImageUsageFlagBits::eTransferDst); - auto result = vuk::host_data_to_image(self.device->allocator.value(), vuk::DomainFlagBits::eGraphicsQueue, dst_attachment_info, data, LOC); - result = vuk::generate_mips(std::move(result), 0, image_view.mip_count() - 1); - return result; + auto upload_pass = vuk::make_pass( + "upload", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eTransferRead) src, + VUK_IA(vuk::eTransferWrite) dst) { + auto buffer_copy_region = vuk::BufferImageCopy{ + .bufferOffset = src->offset, + .imageSubresource = { .aspectMask = vuk::ImageAspectFlagBits::eColor, .layerCount = 1 }, + .imageOffset = {}, + .imageExtent = dst->extent, + }; + cmd_list.copy_buffer_to_image(src, dst, buffer_copy_region); + return dst; + }, + vuk::DomainFlagBits::eAny, + LOC + ); + + return upload_pass(std::move(src), std::move(dst)); } auto TransferManager::scratch_buffer(this TransferManager &self, const void *data, u64 size, vuk::source_location LOC) -> vuk::Value { @@ -139,7 +121,7 @@ auto TransferManager::wait_on(this TransferManager &self, vuk::UntypedValue &&fu thread_local vuk::Compiler transfer_man_compiler; fut.wait(self.device->get_allocator(), transfer_man_compiler); #else - std::unique_lock _(self.mutex); + auto write_lock = std::unique_lock(self.mutex); self.futures.push_back(std::move(fut)); #endif } @@ -147,7 +129,7 @@ auto TransferManager::wait_on(this TransferManager &self, vuk::UntypedValue &&fu auto TransferManager::wait_for_ops(this TransferManager &self, vuk::Compiler &compiler) -> void { ZoneScoped; - std::unique_lock _(self.mutex); + auto write_lock = std::unique_lock(self.mutex); vuk::wait_for_values_explicit(*self.frame_allocator, compiler, self.futures, {}); self.futures.clear(); } @@ -155,15 +137,28 @@ auto TransferManager::wait_for_ops(this TransferManager &self, vuk::Compiler &co auto TransferManager::acquire(this TransferManager &self, vuk::DeviceSuperFrameResource &super_frame_resource) -> void { ZoneScoped; - std::unique_lock _(self.mutex); + auto write_lock = std::unique_lock(self.mutex); auto &frame_resource = super_frame_resource.get_next_frame(); self.frame_allocator.emplace(frame_resource); + + for (auto it = self.image_buffers.begin(); it != self.image_buffers.end();) { + auto image_buffer = &*it; + if (*image_buffer->poll() == vuk::Signal::Status::eHostAvailable) { + auto evaluated_buffer = vuk::eval(image_buffer->get_head()); + LS_EXPECT(evaluated_buffer.holds_value()); + self.device->allocator->deallocate({ &evaluated_buffer.value(), 1 }); + it = self.image_buffers.erase(it); + continue; + } + + ++it; + } } auto TransferManager::release(this TransferManager &self) -> void { ZoneScoped; - std::unique_lock _(self.mutex); + auto write_lock = std::unique_lock(self.mutex); self.frame_allocator.reset(); } diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index b3ce1c43..ac08f652 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -21,6 +21,7 @@ private: mutable std::shared_mutex mutex = {}; std::vector futures = {}; + plf::colony> image_buffers = {}; ls::option frame_allocator; @@ -34,42 +35,25 @@ public: auto destroy(this TransferManager &) -> void; [[nodiscard]] - auto alloc_transient_buffer_raw(this TransferManager &, vuk::MemoryUsage usage, usize size, LR_THISCALL) -> vuk::Buffer; + auto alloc_transient_buffer(this TransferManager &, vuk::MemoryUsage usage, usize size, LR_THISCALL) noexcept -> vuk::Value; [[nodiscard]] - auto alloc_transient_buffer(this TransferManager &, vuk::MemoryUsage usage, usize size, LR_THISCALL) -> vuk::Value; + auto alloc_image_buffer(this TransferManager &, vuk::Format format, vuk::Extent3D extent) noexcept -> vuk::Value; [[nodiscard]] - auto upload_staging(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, LR_THISCALL) -> vuk::Value; + auto upload(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, LR_THISCALL) -> vuk::Value; [[nodiscard]] - auto upload_staging(this TransferManager &, vuk::Value &&src, Buffer &dst, u64 dst_offset = 0, LR_THISCALL) - -> vuk::Value; - - [[nodiscard]] - auto upload_staging(this TransferManager &, void *data, u64 data_size, vuk::Value &&dst, u64 dst_offset = 0, LR_THISCALL) - -> vuk::Value; - - [[nodiscard]] - auto upload_staging(this TransferManager &, void *data, u64 data_size, Buffer &dst, u64 dst_offset = 0, LR_THISCALL) -> vuk::Value; - - [[nodiscard]] - auto upload_staging(this TransferManager &, ImageView &image_view, void *data, u64 data_size, LR_THISCALL) -> vuk::Value; - - template - [[nodiscard]] auto upload_staging(this TransferManager &self, ls::span span, Buffer &dst, u64 dst_offset = 0, LR_THISCALL) - -> vuk::Value { - ZoneScoped; - - return self.upload_staging(reinterpret_cast(span.data()), span.size_bytes(), dst, dst_offset, LOC); - } + auto upload(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, LR_THISCALL) + -> vuk::Value; template - [[nodiscard]] auto upload_staging(this TransferManager &self, ls::span span, vuk::Value &&dst, u64 dst_offset = 0, LR_THISCALL) - -> vuk::Value { + [[nodiscard]] auto upload(this TransferManager &self, ls::span span, vuk::Value &&dst, LR_THISCALL) -> vuk::Value { ZoneScoped; - return self.upload_staging(reinterpret_cast(span.data()), span.size_bytes(), std::move(dst), dst_offset, LOC); + auto src = self.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, span.size_bytes(), LOC); + std::memcpy(src->mapped_ptr, span.data(), span.size_bytes()); + return self.upload(std::move(src), std::move(dst), LOC); } template diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 444f4e99..b1891384 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -228,10 +228,11 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.dirty_transform_ids.empty()) { auto rebuild_transforms = !self.transforms_buffer || self.transforms_buffer.data_size() <= info.gpu_transforms.size_bytes(); self.transforms_buffer = self.transforms_buffer.resize(device, info.gpu_transforms.size_bytes()).value(); + prepared_frame.transforms_buffer = self.transforms_buffer.acquire(device, "transforms", rebuild_transforms ? vuk::eNone : vuk::eMemoryRead); if (rebuild_transforms) { // If we resize buffer, we need to refill it again, so individual uploads are not required. - prepared_frame.transforms_buffer = transfer_man.upload_staging(info.gpu_transforms, self.transforms_buffer); + prepared_frame.transforms_buffer = transfer_man.upload(info.gpu_transforms, std::move(prepared_frame.transforms_buffer)); } else { // Buffer is not resized, upload individual transforms. auto dirty_transforms_count = info.dirty_transform_ids.size(); @@ -266,7 +267,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } ); - prepared_frame.transforms_buffer = self.transforms_buffer.acquire(device, "transforms", vuk::Access::eMemoryRead); prepared_frame.transforms_buffer = update_transforms_pass(std::move(upload_buffer), std::move(prepared_frame.transforms_buffer)); } } else if (self.transforms_buffer) { @@ -276,9 +276,10 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.dirty_material_indices.empty()) { auto rebuild_materials = !self.materials_buffer || self.materials_buffer.data_size() <= info.gpu_materials.size_bytes(); self.materials_buffer = self.materials_buffer.resize(device, info.gpu_materials.size_bytes()).value(); + prepared_frame.materials_buffer = self.materials_buffer.acquire(device, "materials", rebuild_materials ? vuk::eNone : vuk::eMemoryRead); if (rebuild_materials) { - prepared_frame.materials_buffer = transfer_man.upload_staging(info.gpu_materials, self.materials_buffer); + prepared_frame.materials_buffer = transfer_man.upload(info.gpu_materials, std::move(prepared_frame.materials_buffer)); } else { // TODO: Literally repeating code, find a solution to this auto dirty_materials_count = info.dirty_material_indices.size(); @@ -311,7 +312,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } ); - prepared_frame.materials_buffer = self.materials_buffer.acquire(device, "materials", vuk::eMemoryRead); prepared_frame.materials_buffer = update_materials_pass(std::move(upload_buffer), std::move(prepared_frame.materials_buffer)); } } else if (self.materials_buffer) { @@ -320,14 +320,16 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.gpu_meshes.empty()) { self.meshes_buffer = self.meshes_buffer.resize(device, info.gpu_meshes.size_bytes()).value(); - prepared_frame.meshes_buffer = transfer_man.upload_staging(info.gpu_meshes, self.meshes_buffer); + prepared_frame.meshes_buffer = self.meshes_buffer.acquire(device, "meshes", vuk::eNone); + prepared_frame.meshes_buffer = transfer_man.upload(info.gpu_meshes, std::move(prepared_frame.meshes_buffer)); } else if (self.meshes_buffer) { prepared_frame.meshes_buffer = self.meshes_buffer.acquire(device, "meshes", vuk::eMemoryRead); } if (!info.gpu_mesh_instances.empty()) { self.mesh_instances_buffer = self.mesh_instances_buffer.resize(device, info.gpu_mesh_instances.size_bytes()).value(); - prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); + prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eNone); + prepared_frame.mesh_instances_buffer = transfer_man.upload(info.gpu_mesh_instances, std::move(prepared_frame.mesh_instances_buffer)); } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); } diff --git a/Lorr/ls/defer.hh b/Lorr/ls/defer.hh index bbcaba5e..31f09f13 100755 --- a/Lorr/ls/defer.hh +++ b/Lorr/ls/defer.hh @@ -9,7 +9,7 @@ template struct defer { Fn func; - defer(Fn func_): func(std::move(func_)) {} + defer(Fn func_) : func(std::move(func_)) {} ~defer() { func(); From c03faf770163faa059bafe6dd1dabae4963b2dc8 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:00:15 +0300 Subject: [PATCH 22/27] fix materials --- .../Engine/Graphics/Vulkan/TransferManager.cc | 11 ++++---- Lorr/Engine/Graphics/VulkanDevice.hh | 2 +- .../Resources/shaders/passes/brdf.slang | 27 ++++++++++--------- Lorr/Engine/Resources/shaders/pbr.slang | 22 +++++++-------- Lorr/Engine/Scene/Scene.cc | 2 +- xmake/packages.lua | 2 +- 6 files changed, 35 insertions(+), 31 deletions(-) diff --git a/Lorr/Engine/Graphics/Vulkan/TransferManager.cc b/Lorr/Engine/Graphics/Vulkan/TransferManager.cc index 69f65705..f700879b 100644 --- a/Lorr/Engine/Graphics/Vulkan/TransferManager.cc +++ b/Lorr/Engine/Graphics/Vulkan/TransferManager.cc @@ -21,12 +21,13 @@ auto TransferManager::alloc_transient_buffer(this TransferManager &self, vuk::Me auto buffer = vuk::Buffer{}; auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = usage, .size = size, .alignment = self.device->non_coherent_atom_size() }; - self.frame_allocator->allocate_buffers(std::span{ &buffer, 1 }, std::span{ &buffer_info, 1 }, LOC); + self.frame_allocator->allocate_buffers({ &buffer, 1 }, { &buffer_info, 1 }, LOC); - return vuk::acquire_buf("transient buffer", buffer, vuk::Access::eNone, LOC); + return vuk::acquire_buf("transient buffer", buffer, vuk::eNone, LOC); } -auto TransferManager::alloc_image_buffer(this TransferManager &self, vuk::Format format, vuk::Extent3D extent) noexcept -> vuk::Value { +auto TransferManager::alloc_image_buffer(this TransferManager &self, vuk::Format format, vuk::Extent3D extent, vuk::source_location LOC) noexcept + -> vuk::Value { ZoneScoped; auto write_lock = std::unique_lock(self.mutex); @@ -35,9 +36,9 @@ auto TransferManager::alloc_image_buffer(this TransferManager &self, vuk::Format auto buffer_handle = vuk::Buffer{}; auto buffer_info = vuk::BufferCreateInfo{ .mem_usage = vuk::MemoryUsage::eCPUtoGPU, .size = size, .alignment = alignment }; - self.device->allocator->allocate_buffers({ &buffer_handle, 1 }, { &buffer_info, 1 }); + self.device->allocator->allocate_buffers({ &buffer_handle, 1 }, { &buffer_info, 1 }, LOC); - auto buffer = vuk::acquire_buf("image buffer", buffer_handle, vuk::Access::eNone); + auto buffer = vuk::acquire_buf("image buffer", buffer_handle, vuk::eNone, LOC); self.image_buffers.emplace(buffer); return buffer; diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index ac08f652..5b53f689 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -38,7 +38,7 @@ public: auto alloc_transient_buffer(this TransferManager &, vuk::MemoryUsage usage, usize size, LR_THISCALL) noexcept -> vuk::Value; [[nodiscard]] - auto alloc_image_buffer(this TransferManager &, vuk::Format format, vuk::Extent3D extent) noexcept -> vuk::Value; + auto alloc_image_buffer(this TransferManager &, vuk::Format format, vuk::Extent3D extent, LR_THISCALL) noexcept -> vuk::Value; [[nodiscard]] auto upload(this TransferManager &, vuk::Value &&src, vuk::Value &&dst, LR_THISCALL) -> vuk::Value; diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 34584812..3bd6a67f 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -50,12 +50,12 @@ func fs_main(VertexOutput input) -> f32x4 { let world_position = world_position_h.xyz / world_position_h.w; // PBR constants - const f32x3 V = normalize(params.camera.position - world_position); - const f32x3 L = normalize(params.environment.sun_direction); // temp - const f32x3 N = mapped_normal; + let V = normalize(params.camera.position - world_position); + let L = normalize(params.environment.sun_direction); // temp + let N = normalize(mapped_normal); var sun_illuminance = f32x3(1.0); - var sky_luminance = f32x3(1.0); + var sky_luminance = f32x3(0.1); if (params.environment.flags & (EnvironmentFlags::HasSun | EnvironmentFlags::HasAtmosphere)) { // SUN LIGHT COLOR ────────────────────────────────────────────────── var eye_altitude = max(world_position.y, 0.0) * CAMERA_SCALE_UNIT; @@ -89,21 +89,24 @@ func fs_main(VertexOutput input) -> f32x4 { sky_luminance = std::rec709_oetf(sky_result.luminance) * eye_gradient; } - f32x3 ambient_contribution = sky_luminance * albedo_color * occlusion; + let ambient_contribution = sky_luminance * albedo_color * occlusion; // MATERIAL COLOR ─────────────────────────────────────────────────── // https://marmosetco.tumblr.com/post/81245981087 - const f32x3 R = reflect(-V, N); - const f32 horizon_fade = 1.3; - f32 horizon = saturate(1.0 + horizon_fade * dot(R, smooth_normal)); + let R = reflect(-V, N); + let horizon_fade = 1.3; + var horizon = saturate(1.0 + horizon_fade * dot(R, smooth_normal)); horizon *= horizon; - const f32 NoL = max(dot(N, L), 0.0); - f32x3 brdf = BRDF(V, N, L, albedo_color, roughness, metallic); - f32x3 material_surface_color = brdf * horizon * sun_illuminance * NoL; + var material_surface_color = f32x3(0.0); + let NoL = max(dot(N, L), 0.0); + if (NoL > 0.0) { + let brdf = BRDF(V, N, L, albedo_color, roughness, metallic); + material_surface_color = brdf * horizon * sun_illuminance * NoL; + } // FINAL ──────────────────────────────────────────────────────────── - f32x3 final_color = material_surface_color + ambient_contribution + emission; + let final_color = material_surface_color + ambient_contribution + emission; return f32x4(final_color, 1.0); } diff --git a/Lorr/Engine/Resources/shaders/pbr.slang b/Lorr/Engine/Resources/shaders/pbr.slang index 72575bf3..986ad988 100644 --- a/Lorr/Engine/Resources/shaders/pbr.slang +++ b/Lorr/Engine/Resources/shaders/pbr.slang @@ -45,22 +45,22 @@ public func BRDF(f32x3 V, f32x3 N, f32x3 L, f32x3 albedo, f32 roughness, f32 met f32 NoH = max(dot(N, H), 0.0); f32 LoH = max(dot(L, H), 0.0); - const f32 reflectance = 0.5; - f32x3 F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + albedo * metallic; + let reflectance = 0.04; + let F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + albedo * metallic; // Microfacet - f32 roughness2 = roughness * roughness; - f32 D = D_GGX(NoH, roughness2); - f32x3 G2 = Smith_G2_Height_Correlated_GGX_Lagarde(NoV, NoL, roughness2); - f32x3 F = F_Schlick(LoH, F0); - - f32x3 comp = GGX_energy_compensation(NoV, roughness2, F0); + let roughness2 = roughness * roughness; + let D = D_GGX(NoH, roughness2); + let G2 = Smith_G2_Height_Correlated_GGX_Lagarde(NoV, NoL, roughness2); + let F = F_Schlick(LoH, F0); + let J = GGX_energy_compensation(NoV, roughness2, F0); // Reflectance (Cook-Torrance) // V already divided by denominator - f32x3 specular = F * D * G2; + let specular = F * D * G2; // Diffuse - f32x3 diffuse = (1.0 - metallic) * albedo * Fd_Lambert(); + let kd = (1.0 - metallic) * (1.0 - F); + let diffuse = kd * albedo * Fd_Lambert(); // Frensel combination - return diffuse + specular * comp; + return (diffuse + specular) * J; } diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 603aeabf..097207db 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -717,7 +717,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< auto dirty_index = SlotMap_decode_id(dirty_id).index; dirty_material_indices.push_back(dirty_index); - if (dirty_index <= self.gpu_materials.size()) { + if (dirty_index >= self.gpu_materials.size()) { self.gpu_materials.resize(dirty_index + 1, {}); } diff --git a/xmake/packages.lua b/xmake/packages.lua index a73b9827..816d5756 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -62,6 +62,6 @@ add_requires("vuk 2025.07.09", { configs = { }, debug = is_mode("debug") }) add_requires("meshoptimizer v0.24") -add_requires("ktx v4.4.0", { debug = true }) +add_requires("ktx v4.4.0", { debug = is_plat("windows") }) add_requires("svector v1.0.3") From 9fd7d0b0f78964593810904726851dafda64425b Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 31 Aug 2025 16:20:03 +0300 Subject: [PATCH 23/27] two pass occlusion culling for meshes (again, with delay) --- .../shaders/passes/cull_meshes.slang | 123 ++++++++++----- .../shaders/passes/cull_meshlets.slang | 27 +--- .../passes/generate_cull_commands.slang | 3 +- .../Resources/shaders/passes/tonemap.slang | 4 +- Lorr/Engine/Scene/SceneRenderer.cc | 148 +++++++++++++++--- Lorr/Engine/Scene/SceneRenderer.hh | 5 +- 6 files changed, 223 insertions(+), 87 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 13e065df..3840e418 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -4,16 +4,20 @@ import scene; import cull; import debug_drawer; +[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshes; [[vk::binding(2)]] StructuredBuffer transforms; -[[vk::binding(3)]] RWStructuredBuffer mesh_instances; -[[vk::binding(4)]] RWStructuredBuffer meshlet_instances; -[[vk::binding(5)]] RWStructuredBuffer visible_meshlet_instances_count; -[[vk::binding(6)]] RWStructuredBuffer debug_drawer; +[[vk::binding(3)]] Image2D hiz_image; +[[vk::binding(4)]] Sampler hiz_sampler; +[[vk::binding(5)]] RWStructuredBuffer mesh_instances; +[[vk::binding(6)]] RWStructuredBuffer meshlet_instances; +[[vk::binding(7)]] RWStructuredBuffer mesh_instance_visibility_mask; +[[vk::binding(8)]] RWStructuredBuffer visible_meshlet_instances_count; +[[vk::binding(9)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHES_COUNT - #define CULLING_MESHES_COUNT 64 +#define CULLING_MESHES_COUNT 64 #endif [[shader("compute")]] @@ -28,53 +32,96 @@ func cs_main( return; } + let mask_index = mesh_instance_index / 32; + let bit_index = mesh_instance_index - mask_index * 32; + let visibility_bit = 1 << bit_index; + let was_visible = (mesh_instance_visibility_mask[mask_index] & visibility_bit) != 0; + let mesh_instance = &mesh_instances[mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; let transform = transforms[mesh_instance.transform_index]; let mvp = mul(camera.projection_view_mat, transform.world); let cull_frustum = (cull_flags & CullFlags::MeshFrustum) != 0; + let cull_occlusion = (cull_flags & CullFlags::MeshOcclusion) != 0; - var visible = true; - visible = visible && cull_frustum && test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); + var visible = (LATE == 0) ? was_visible : true; + if (visible && cull_frustum) { + visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); + } - var lod_index = 0; + if (LATE == 1 && visible && cull_occlusion) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); + if (visible && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 1.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } + } + } + + if (visible && (LATE == 0 || !was_visible)) { + var lod_index = 0; #if 1 - // Credits: - // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 - let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; - let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; - let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; - let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; - let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); - let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); - // Avoiding the atan here - let rough_resolution = max(camera.resolution.x, camera.resolution.y); - let fov90_distance_to_screen_ratio = 2.0f; - let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; - let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); - let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + // Avoiding the atan here + let rough_resolution = max(camera.resolution.x, camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; - for (var i = 1; i < mesh.lod_count; i++) { - let mesh_lod = mesh.lods[i]; - let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; - if (rough_pixel_error < camera.acceptable_lod_error) { - lod_index = i; + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < camera.acceptable_lod_error) { + lod_index = i; + } else { + break; + } + } +#endif + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_count = mesh_lod.meshlet_count; + + var base_meshlet_instance_offset = 0; + if (LATE == 0) { + base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); } else { - break; + let early_count = visible_meshlet_instances_count[0]; + let late_offset = __atomic_add(visible_meshlet_instances_count[1], meshlet_count, MemoryOrder::Relaxed); + base_meshlet_instance_offset = early_count + late_offset; + } + + for (u32 i = 0; i < meshlet_count; i++) { + let offset = base_meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + meshlet_instances[offset] = meshlet_instance; } } -#endif - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - let meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], mesh_lod.meshlet_count, MemoryOrder::Relaxed); - for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { - let offset = meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - meshlet_instances[offset] = meshlet_instance; + if (LATE == 1) { + if (visible) { + __atomic_or(mesh_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::Relaxed); + } else { + __atomic_and(mesh_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::Relaxed); + } } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index c0e078d1..b99efbd8 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -6,6 +6,7 @@ import debug_drawer; #include +[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshlet_instances; [[vk::binding(2)]] StructuredBuffer mesh_instances; @@ -28,12 +29,16 @@ func cs_main( uint3 thread_id : SV_DispatchThreadID, uniform CullFlags cull_flags ) -> void { - let meshlet_instance_count = visible_meshlet_instances_count[0]; - let meshlet_instance_index = thread_id.x; + let meshlet_instance_count = visible_meshlet_instances_count[LATE]; + var meshlet_instance_index = thread_id.x; if (meshlet_instance_index >= meshlet_instance_count) { return; } + if (LATE == 1) { + meshlet_instance_index += visible_meshlet_instances_count[0]; + } + let meshlet_instance = meshlet_instances[meshlet_instance_index]; let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; @@ -50,24 +55,6 @@ func cs_main( visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } - if (visible && cull_occlusion) { - let mvp_prev = mul(camera.prev_projection_view_mat, transform.world); - if (let screen_aabb = project_aabb(mvp_prev, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); - - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 0.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); - } - } - } - if (visible) { let index = __atomic_add(cull_triangles_cmd[0].x, 1, MemoryOrder::Relaxed); visible_meshlet_instances_indices[index] = meshlet_instance_index; diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang index bb9814cf..ed3600df 100644 --- a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -1,11 +1,12 @@ import std; import gpu; +[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] StructuredBuffer visible_meshlet_instances_count; [[vk::binding(1)]] RWStructuredBuffer cull_meshlets_cmd; [[shader("compute")]] [[numthreads(1, 1, 1)]] func cs_main() -> void { - cull_meshlets_cmd[0].x = (visible_meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; + cull_meshlets_cmd[0].x = (visible_meshlet_instances_count[LATE] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; } diff --git a/Lorr/Engine/Resources/shaders/passes/tonemap.slang b/Lorr/Engine/Resources/shaders/passes/tonemap.slang index 13900b34..bab2310c 100644 --- a/Lorr/Engine/Resources/shaders/passes/tonemap.slang +++ b/Lorr/Engine/Resources/shaders/passes/tonemap.slang @@ -11,7 +11,7 @@ struct ShaderParameters { Image2D input_image; ConstantBuffer environment; - ConstantBuffer histogram_luminance; + StructuredBuffer histogram_luminance; }; uniform ParameterBlock params; @@ -554,7 +554,7 @@ struct GT7ToneMapping f32x4 fs_main(VertexOutput input) { f32x3 color = params.input_image.SampleLevel(params.sampler, input.tex_coord, 0.0).rgb; if (params.environment.flags & EnvironmentFlags::HasEyeAdaptation) { - let exposure = params.histogram_luminance.exposure; + let exposure = params.histogram_luminance[0].exposure; color = color * (exposure + 1.0); } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index b1891384..b00b060b 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -5,6 +5,7 @@ #include "Engine/Core/App.hh" #include "Engine/Graphics/VulkanDevice.hh" +#include "Engine/Memory/Stack.hh" namespace lr { enum BindlessDescriptorLayout : u32 { @@ -330,8 +331,17 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in self.mesh_instances_buffer = self.mesh_instances_buffer.resize(device, info.gpu_mesh_instances.size_bytes()).value(); prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eNone); prepared_frame.mesh_instances_buffer = transfer_man.upload(info.gpu_mesh_instances, std::move(prepared_frame.mesh_instances_buffer)); + + auto mesh_instance_visibility_mask_size_bytes = (info.mesh_instance_count + 31) / 32 * sizeof(u32); + self.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.resize(device, mesh_instance_visibility_mask_size_bytes).value(); + prepared_frame.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eNone); + prepared_frame.mesh_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.mesh_instance_visibility_mask_buffer)); } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); + prepared_frame.mesh_instance_visibility_mask_buffer = + self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); } info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); @@ -403,28 +413,34 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } static auto cull_meshes( + bool late, GPU::CullFlags cull_flags, u32 mesh_instance_count, TransferManager &transfer_man, vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, + vuk::Value &mesh_instance_visibility_mask_buffer, vuk::Value &meshlet_instances_buffer, vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &transforms_buffer, + vuk::Value &hiz_attachment, vuk::Value &camera_buffer, vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; + memory::ScopedStack stack; auto vis_cull_meshes_pass = vuk::make_pass( - "vis cull meshes", - [mesh_instance_count, cull_flags]( + stack.format("vis cull meshes {}", late ? "late" : "early"), + [late, cull_flags, mesh_instance_count]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, + VUK_IA(vuk::eComputeSampled) hiz, VUK_BA(vuk::eComputeRW) mesh_instances, VUK_BA(vuk::eComputeRW) meshlet_instances, + VUK_BA(vuk::eComputeRW) mesh_instance_visibility_mask, VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) debug_drawer ) { @@ -433,14 +449,28 @@ static auto cull_meshes( .bind_buffer(0, 0, camera) .bind_buffer(0, 1, meshes) .bind_buffer(0, 2, transforms) - .bind_buffer(0, 3, mesh_instances) - .bind_buffer(0, 4, meshlet_instances) - .bind_buffer(0, 5, visible_meshlet_instances_count) - .bind_buffer(0, 6, debug_drawer) + .bind_image(0, 3, hiz) + .bind_sampler(0, 4, hiz_sampler_info) + .bind_buffer(0, 5, mesh_instances) + .bind_buffer(0, 6, meshlet_instances) + .bind_buffer(0, 7, mesh_instance_visibility_mask) + .bind_buffer(0, 8, visible_meshlet_instances_count) + .bind_buffer(0, 9, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) + .specialize_constants(0, late) .dispatch_invocations(mesh_instance_count); - return std::make_tuple(camera, meshes, transforms, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); + return std::make_tuple( + camera, + meshes, + transforms, + hiz, + mesh_instances, + meshlet_instances, + mesh_instance_visibility_mask, + visible_meshlet_instances_count, + debug_drawer + ); } ); @@ -448,8 +478,10 @@ static auto cull_meshes( camera_buffer, meshes_buffer, transforms_buffer, + hiz_attachment, mesh_instances_buffer, meshlet_instances_buffer, + mesh_instance_visibility_mask_buffer, visible_meshlet_instances_count_buffer, debug_drawer_buffer ) = @@ -457,21 +489,26 @@ static auto cull_meshes( std::move(camera_buffer), std::move(meshes_buffer), std::move(transforms_buffer), + std::move(hiz_attachment), std::move(mesh_instances_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instance_visibility_mask_buffer), std::move(visible_meshlet_instances_count_buffer), std::move(debug_drawer_buffer) ); auto generate_cull_commands_pass = vuk::make_pass( - "generate cull commands", - [](vuk::CommandBuffer &cmd_list, // - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { + stack.format("generate cull commands {}", late ? "late" : "early"), + [late]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd + ) { cmd_list // .bind_compute_pipeline("passes.generate_cull_commands") .bind_buffer(0, 0, visible_meshlet_instances_count) .bind_buffer(0, 1, cull_meshlets_cmd) + .specialize_constants(0, late) .dispatch(1); return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); @@ -486,6 +523,7 @@ static auto cull_meshes( } static auto cull_meshlets( + bool late, GPU::CullFlags cull_flags, TransferManager &transfer_man, vuk::Value &hiz_attachment, @@ -501,11 +539,12 @@ static auto cull_meshlets( vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; + memory::ScopedStack stack; // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( - "vis cull meshlets", - [cull_flags]( + stack.format("vis cull meshlets {}", late ? "late" : "early"), + [late, cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, @@ -519,7 +558,7 @@ static auto cull_meshlets( VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeRW) debug_drawer ) { - cmd_list.image_barrier(hiz, vuk::eComputeRW, vuk::eComputeSampled, 0, hiz->level_count); + // cmd_list.image_barrier(hiz, vuk::eComputeRW, vuk::eComputeSampled, 0, hiz->level_count); cmd_list // .bind_compute_pipeline("passes.cull_meshlets") @@ -535,6 +574,7 @@ static auto cull_meshlets( .bind_buffer(0, 9, cull_triangles_cmd) .bind_buffer(0, 10, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) + .specialize_constants(0, late) .dispatch_indirect(dispatch_cmd); return std::make_tuple( @@ -584,7 +624,7 @@ static auto cull_meshlets( // ── CULL TRIANGLES ────────────────────────────────────────────────── auto vis_cull_triangles_pass = vuk::make_pass( - "vis cull triangles", + stack.format("vis cull triangles {}", late ? "late" : "early"), [cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, @@ -651,6 +691,7 @@ static auto cull_meshlets( } static auto draw_visbuffer( + bool late, vuk::PersistentDescriptorSet &descriptor_set, vuk::Value &depth_attachment, vuk::Value &visbuffer_attachment, @@ -665,9 +706,10 @@ static auto draw_visbuffer( vuk::Value &camera_buffer ) -> void { ZoneScoped; + memory::ScopedStack stack; auto vis_encode_pass = vuk::make_pass( - "vis encode", + stack.format("vis encode {}", late ? "late" : "early"), [&descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, @@ -783,7 +825,7 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu "hiz generate slow", [](vuk::CommandBuffer &cmd_list, // VUK_IA(vuk::eComputeSampled) src, - VUK_IA(vuk::eComputeRW) dst) { + VUK_IA(vuk::eComputeWrite) dst) { auto extent = dst->extent; auto mip_count = dst->level_count; @@ -1139,6 +1181,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value({}); - auto visible_meshlet_instances_count_buffer = transfer_man.scratch_buffer(0_u32); - auto cull_meshlets_cmd_buffer = cull_meshes( + auto early_cull_meshlets_cmd_buffer = cull_meshes( + false, info.cull_flags, frame.mesh_instance_count, transfer_man, meshes_buffer, mesh_instances_buffer, + mesh_instance_visibility_mask_buffer, meshlet_instances_buffer, visible_meshlet_instances_count_buffer, transforms_buffer, + hiz_attachment, camera_buffer, debug_drawer_buffer ); - auto draw_command_buffer = cull_meshlets( + auto early_draw_visbuffer_cmd_buffer = cull_meshlets( + false, info.cull_flags, transfer_man, hiz_attachment, - cull_meshlets_cmd_buffer, + early_cull_meshlets_cmd_buffer, visible_meshlet_instances_count_buffer, visible_meshlet_instances_indices_buffer, reordered_indices_buffer, @@ -1180,11 +1227,12 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value void { self.meshes_buffer = {}; } + if (self.mesh_instance_visibility_mask_buffer) { + device.destroy(self.mesh_instance_visibility_mask_buffer.id()); + self.mesh_instance_visibility_mask_buffer = {}; + } + if (self.materials_buffer) { device.destroy(self.materials_buffer.id()); self.materials_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 3eeb3af1..e3f25a30 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -30,6 +30,7 @@ struct PreparedFrame { vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; + vuk::Value mesh_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; @@ -54,11 +55,10 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; + Buffer mesh_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; - // Then what are they? - // TODO: Per scene sky settings Image sky_transmittance_lut = {}; ImageView sky_transmittance_lut_view = {}; Image sky_multiscatter_lut = {}; @@ -74,7 +74,6 @@ struct SceneRenderer { auto init(this SceneRenderer &) -> bool; auto destroy(this SceneRenderer &) -> void; - // Scene auto prepare_frame(this SceneRenderer &, FramePrepareInfo &info) -> PreparedFrame; auto render(this SceneRenderer &, vuk::Value &&dst_attachment, SceneRenderInfo &render_info, PreparedFrame &frame) -> vuk::Value; From 644b223f0e0b9f16250ef6a3ece18bb2ea87dcfc Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:28:18 +0300 Subject: [PATCH 24/27] fix syncvals --- .../shaders/passes/cull_meshes.slang | 116 ++++++++++-------- .../Resources/shaders/passes/tonemap.slang | 4 +- Lorr/Engine/Scene/SceneRenderer.cc | 14 ++- Lorr/Runtime/main.cc | 2 +- xmake/packages.lua | 2 +- xmake/repo/packages/v/vuk/port/xmake.lua | 81 ++++++++++++ xmake/repo/packages/v/vuk/xmake.lua | 58 +++++++++ 7 files changed, 220 insertions(+), 57 deletions(-) create mode 100644 xmake/repo/packages/v/vuk/port/xmake.lua create mode 100644 xmake/repo/packages/v/vuk/xmake.lua diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 3840e418..3fd09c6c 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -44,31 +44,80 @@ func cs_main( let cull_frustum = (cull_flags & CullFlags::MeshFrustum) != 0; let cull_occlusion = (cull_flags & CullFlags::MeshOcclusion) != 0; + let lod_index = 0; - var visible = (LATE == 0) ? was_visible : true; - if (visible && cull_frustum) { - visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); - } + if (LATE == 0) { + var visible = was_visible; + if (visible) { + visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); + } + + let should_draw = visible; + if (should_draw) { + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_count = mesh_lod.meshlet_count; + let base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); + for (u32 i = 0; i < meshlet_count; i++) { + let offset = base_meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + meshlet_instances[offset] = meshlet_instance; + } + } + } else { + var visible = true; + + var in_frustum = true; + if (visible) { + in_frustum = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); + } + + visible = visible && in_frustum; + + var passed_occlusion = true; + if (visible) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + passed_occlusion = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); + if (passed_occlusion && true) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 1.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } + } + } - if (LATE == 1 && visible && cull_occlusion) { - if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); - if (visible && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 1.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); + let fallback = visible && !passed_occlusion && !was_visible; + visible = in_frustum && (passed_occlusion || fallback); + let should_draw = visible && !was_visible; + if (should_draw) { + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_count = mesh_lod.meshlet_count; + let base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); + for (u32 i = 0; i < meshlet_count; i++) { + let offset = base_meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + meshlet_instances[offset] = meshlet_instance; } } + + if (in_frustum && passed_occlusion) { + __atomic_or(mesh_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::Relaxed); + } else { + __atomic_and(mesh_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::Relaxed); + } } - if (visible && (LATE == 0 || !was_visible)) { - var lod_index = 0; -#if 1 +#if 0 // Credits: // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; @@ -95,33 +144,4 @@ func cs_main( } } #endif - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - let meshlet_count = mesh_lod.meshlet_count; - - var base_meshlet_instance_offset = 0; - if (LATE == 0) { - base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); - } else { - let early_count = visible_meshlet_instances_count[0]; - let late_offset = __atomic_add(visible_meshlet_instances_count[1], meshlet_count, MemoryOrder::Relaxed); - base_meshlet_instance_offset = early_count + late_offset; - } - - for (u32 i = 0; i < meshlet_count; i++) { - let offset = base_meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - meshlet_instances[offset] = meshlet_instance; - } - } - - if (LATE == 1) { - if (visible) { - __atomic_or(mesh_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::Relaxed); - } else { - __atomic_and(mesh_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::Relaxed); - } - } } diff --git a/Lorr/Engine/Resources/shaders/passes/tonemap.slang b/Lorr/Engine/Resources/shaders/passes/tonemap.slang index bab2310c..13900b34 100644 --- a/Lorr/Engine/Resources/shaders/passes/tonemap.slang +++ b/Lorr/Engine/Resources/shaders/passes/tonemap.slang @@ -11,7 +11,7 @@ struct ShaderParameters { Image2D input_image; ConstantBuffer environment; - StructuredBuffer histogram_luminance; + ConstantBuffer histogram_luminance; }; uniform ParameterBlock params; @@ -554,7 +554,7 @@ struct GT7ToneMapping f32x4 fs_main(VertexOutput input) { f32x3 color = params.input_image.SampleLevel(params.sampler, input.tex_coord, 0.0).rgb; if (params.environment.flags & EnvironmentFlags::HasEyeAdaptation) { - let exposure = params.histogram_luminance[0].exposure; + let exposure = params.histogram_luminance.exposure; color = color * (exposure + 1.0); } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index b00b060b..10b084ff 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -825,7 +825,7 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu "hiz generate slow", [](vuk::CommandBuffer &cmd_list, // VUK_IA(vuk::eComputeSampled) src, - VUK_IA(vuk::eComputeWrite) dst) { + VUK_IA(vuk::eComputeRW) dst) { auto extent = dst->extent; auto mip_count = dst->level_count; @@ -841,7 +841,9 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu if (i == 0) { cmd_list.bind_image(0, 1, src); } else { - cmd_list.bind_image(0, 1, dst->mip(i - 1)); + auto mip = dst->mip(i - 1); + cmd_list.image_barrier(mip, vuk::eComputeWrite, vuk::eComputeSampled); + cmd_list.bind_image(0, 1, mip); } cmd_list.bind_image(0, 2, mip); @@ -849,6 +851,8 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu cmd_list.dispatch_invocations(mip_width, mip_height); } + cmd_list.image_barrier(dst, vuk::eComputeSampled, vuk::eComputeRW); + return std::make_tuple(src, dst); } ); @@ -1342,7 +1346,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value(3) + .module(1) .module(window_info) .module() .module() diff --git a/xmake/packages.lua b/xmake/packages.lua index 816d5756..9aba045a 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -56,7 +56,7 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") add_requires("shader-slang v2025.15") -add_requires("vuk 2025.07.09", { configs = { +add_requires("vuk 2025.09.01", { configs = { debug_allocations = false, disable_exceptions = false, }, debug = is_mode("debug") }) diff --git a/xmake/repo/packages/v/vuk/port/xmake.lua b/xmake/repo/packages/v/vuk/port/xmake.lua new file mode 100644 index 00000000..47e1c2c6 --- /dev/null +++ b/xmake/repo/packages/v/vuk/port/xmake.lua @@ -0,0 +1,81 @@ +add_rules("mode.release", "mode.debug") +set_project("vuk") + +add_requires("fmt 11.1.4", { system = false, configs = { header_only = false } }) +add_requires("vulkan-memory-allocator v3.1.0") +add_requires("concurrentqueue v1.0.4") +add_requires("plf_colony v7.41") +add_requires("robin-hood-hashing 3.11.5") +add_requires("stb 2024.06.01") +add_requires("function2 4.2.4") +add_requires("spirv-cross 1.4.309+0") +add_requires("small_vector 2024.12.23") +add_requires("vk-bootstrap v1.4.307") + +option("debug_allocations") + set_default(false) + set_showmenu(true) + add_defines("VUK_DEBUG_ALLOCATIONS=1", { force = true, public = true }) + +option("disable_exceptions") + set_default(false) + set_showmenu(true) + add_defines("VUK_DISABLE_EXCEPTIONS=1", { force = true, public = true }) + +target("vuk") + set_kind("static") + add_languages("cxx20") + add_includedirs("include/", { public = true }) + + add_files("src/*.cpp") + add_files("src/runtime/**.cpp") + + set_options("debug_allocations", "disable_exceptions") + + -- public packages + add_packages( + "fmt", + "robin-hood-hashing", + "plf_colony", + "function2", + "small_vector", + "vulkan-memory-allocator", + "vk-bootstrap", + { public = true }) + + -- private packages + add_packages( + "concurrentqueue", + "stb", + "spirv-cross", + { public = false }) + + if is_os("windows") then + add_defines( + "NOMINMAX", + "VC_EXTRALEAN", + "WIN32_LEAN_AND_MEAN", + "_CRT_SECURE_NO_WARNINGS", + "_SCL_SECURE_NO_WARNINGS", + "_SILENCE_CLANG_CONCEPTS_MESSAGE", + "_SILENCE_CXX23_ALIGNED_STORAGE_DEPRECATION_WARNING", + { public = true }) + end + + on_config(function (target) + if target:has_tool("cxx", "msvc", "cl") then + target:add("defines", "VUK_COMPILER_MSVC=1", { force = true, public = true }) + target:add("cxflags", "/permissive- /Zc:char8_t- /wd4068", { public = false }) + elseif target:has_tool("cxx", "clang_cl", "clang-cl") then + target:add("defines", "VUK_COMPILER_CLANGCL=1", { force = true, public = true }) + target:add("cxflags", "-Wno-nullability-completeness", { public = false }) + target:add("cxflags", "/permissive- /Zc:char8_t- /wd4068", { public = false }) + elseif target:has_tool("cxx", "clang", "clangxx") then + target:add("defines", "VUK_COMPILER_CLANGPP=1", { force = true, public = true }) + target:add("cxflags", "-fno-char8_t -Wno-nullability-completeness -fms-extensions", { public = false }) + elseif target:has_tool("cxx", "gcc", "gxx") then + target:add("defines", "VUK_COMPILER_GPP=1", { force = true, public = true }) + target:add("cxflags", "-fno-char8_t", { public = false }) + end + end) +target_end() diff --git a/xmake/repo/packages/v/vuk/xmake.lua b/xmake/repo/packages/v/vuk/xmake.lua new file mode 100644 index 00000000..7151378b --- /dev/null +++ b/xmake/repo/packages/v/vuk/xmake.lua @@ -0,0 +1,58 @@ +package("vuk") + set_homepage("https://github.com/martty") + set_license("MIT") + + add_urls("https://github.com/martty/vuk.git") + + add_versions("2025.02.26", "257c1629aaa4200071fb752eb24894d05ce367d4") + add_versions("2025.02.26.1", "bad2fe0a1e9c355bc2e9533e40d9783cef1b6f07") + add_versions("2025.02.26.2", "9a194d2709573e4f14a2843d127db82b2cdee9ef") + add_versions("2025.03.04", "e6143a518dff34bebbb05b43056457dea3c86b92") + add_versions("2025.03.07", "add40963bb133ec42ec6a4a5d03f8c9b880a273e") + add_versions("2025.04.08", "8a768031c3da0a9429cfdc67bb179ec1c14a1501") + add_versions("2025.04.08.1", "7831d3e7030ed51a3a6466120770718404449c6c") + add_versions("2025.04.09", "d8b6e6462c01f7ce7520671ba1fda4f3152da2c1") + add_versions("2025.04.14", "b90891644b0ac8e9f77e395fc58e3c40b4091b6c") + add_versions("2025.04.14.1", "23dbfb0b21ae426d5d2e10b2445a2f81b2257bdd") + add_versions("2025.04.15", "e4c5e487b25cb98dcc0234653ca986f8504444ee") + add_versions("2025.04.19", "75771a95ca380af9323eaffd62186ce957c793ec") + add_versions("2025.04.22", "4b9918436e48b91fc89164b54fbb3cbafb6331de") + add_versions("2025.04.28", "ceded9151342919a12a61617fc1fd5dcca99e0e4") + add_versions("2025.04.29", "024df778cfcb79d21fc63236aa5427fcb3823acf") + add_versions("2025.05.06", "73fee60d5f23bf1f14a7b1f6d8b66e19d74b956b") + add_versions("2025.06.15", "ab3bf6c51e31bdb3eb51f85845b83f939d4132de") + add_versions("2025.07.09", "8a1b873f7d0e4bb36ecd680a608a1e057655bb8c") + add_versions("2025.09.01", "c9d2aeea71fe6cd41bd10ea47c38a390546b66ab") + + add_configs("debug_allocations", { description = "Debug VMA allocations", default = false, type = "boolean" }) + add_configs("disable_exceptions", { description = "Disalbe exceptions", default = false, type = "boolean" }) + + add_deps("spirv-cross 1.4.309+0") + add_deps("function2") + + on_load(function (package) + if package:config("disable_exceptions") then + package:add("defines", "VUK_DISABLE_EXCEPTIONS=1") + end + + if package:config("debug_allocations") then + package:add("defines", "VUK_DEBUG_ALLOCATIONS=1") + end + end) + + on_install("windows|x64", "linux|x86_64", function (package) + local configs = {} + configs.debug_allocations = package:config("debug_allocations") + configs.disable_exceptions = package:config("disable_exceptions") + os.cp(path.join(os.scriptdir(), "port", "xmake.lua"), "xmake.lua") + + import("package.tools.xmake").install(package, configs) + + os.cp("include/vuk", package:installdir("include")) + end) + + on_test(function (package) + assert(package:has_cxxincludes("function2/function2.hpp")) + end) +package_end() + From 15ac96c27b78bb70250b3bec81ff63721dc22f36 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 2 Sep 2025 15:42:24 +0300 Subject: [PATCH 25/27] add two pass occlusion for meshlets --- Lorr/Editor/Window/ViewportWindow.cc | 1 + Lorr/Editor/main.cc | 2 +- .../shaders/passes/cull_meshes.slang | 143 +++++----------- .../shaders/passes/cull_meshlets.slang | 72 +++++++-- .../shaders/passes/cull_triangles.slang | 60 +++---- .../passes/generate_cull_commands.slang | 3 +- .../shaders/passes/visbuffer_encode.slang | 2 +- .../shaders/passes/visualize_overdraw.slang | 33 ++++ Lorr/Engine/Resources/shaders/scene.slang | 1 + Lorr/Engine/Scene/GPUScene.hh | 3 +- Lorr/Engine/Scene/Scene.cc | 4 + Lorr/Engine/Scene/SceneRenderer.cc | 153 ++++++++++-------- Lorr/Engine/Scene/SceneRenderer.hh | 3 + 13 files changed, 260 insertions(+), 220 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/passes/visualize_overdraw.slang diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index c8518f91..f9d1e7f4 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -158,6 +158,7 @@ static auto draw_tools(ViewportWindow &self) -> void { ImGui::CheckboxFlags("Cull Triangle Back Face", &cull_flags, std::to_underlying(lr::GPU::CullFlags::TriangleBackFace)); ImGui::CheckboxFlags("Cull Micro Triangles", &cull_flags, std::to_underlying(lr::GPU::CullFlags::MicroTriangles)); ImGui::Checkbox("Debug Lines", &scene_renderer.debug_lines); + ImGui::SliderFloat("Overdraw Heatmap", &scene_renderer.overdraw_heatmap_scale, 0.0f, 100.0f); } } diff --git a/Lorr/Editor/main.cc b/Lorr/Editor/main.cc index d789abd5..6b2c9d43 100755 --- a/Lorr/Editor/main.cc +++ b/Lorr/Editor/main.cc @@ -18,7 +18,7 @@ i32 main(i32, c8 **) { }; lr::AppBuilder() // - .module(1) + .module(3) .module(window_info) .module() .module() diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 3fd09c6c..5a3a9be0 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -4,7 +4,6 @@ import scene; import cull; import debug_drawer; -[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshes; [[vk::binding(2)]] StructuredBuffer transforms; @@ -12,9 +11,8 @@ import debug_drawer; [[vk::binding(4)]] Sampler hiz_sampler; [[vk::binding(5)]] RWStructuredBuffer mesh_instances; [[vk::binding(6)]] RWStructuredBuffer meshlet_instances; -[[vk::binding(7)]] RWStructuredBuffer mesh_instance_visibility_mask; -[[vk::binding(8)]] RWStructuredBuffer visible_meshlet_instances_count; -[[vk::binding(9)]] RWStructuredBuffer debug_drawer; +[[vk::binding(7)]] RWStructuredBuffer visible_meshlet_instances_count; +[[vk::binding(8)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHES_COUNT #define CULLING_MESHES_COUNT 64 @@ -32,116 +30,51 @@ func cs_main( return; } - let mask_index = mesh_instance_index / 32; - let bit_index = mesh_instance_index - mask_index * 32; - let visibility_bit = 1 << bit_index; - let was_visible = (mesh_instance_visibility_mask[mask_index] & visibility_bit) != 0; - let mesh_instance = &mesh_instances[mesh_instance_index]; let mesh = meshes[mesh_instance.mesh_index]; let transform = transforms[mesh_instance.transform_index]; let mvp = mul(camera.projection_view_mat, transform.world); let cull_frustum = (cull_flags & CullFlags::MeshFrustum) != 0; - let cull_occlusion = (cull_flags & CullFlags::MeshOcclusion) != 0; - let lod_index = 0; - - if (LATE == 0) { - var visible = was_visible; - if (visible) { - visible = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); - } - - let should_draw = visible; - if (should_draw) { - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - let meshlet_count = mesh_lod.meshlet_count; - let base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); - for (u32 i = 0; i < meshlet_count; i++) { - let offset = base_meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - meshlet_instances[offset] = meshlet_instance; - } - } - } else { - var visible = true; - - var in_frustum = true; - if (visible) { - in_frustum = test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent); - } - - visible = visible && in_frustum; - - var passed_occlusion = true; - if (visible) { - if (let screen_aabb = project_aabb(mvp, camera.near_clip, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { - passed_occlusion = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, true); - if (passed_occlusion && true) { - let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; - let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; - var debug_rect = DebugRect(); - debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); - debug_rect.extent = ndc_aabb_max - ndc_aabb_min; - debug_rect.color = f32x3(1.0, 0.0, 1.0); - debug_rect.coord = DebugDrawCoord::NDC; - debug_draw_rect(debug_drawer[0], debug_rect); - } - } - } - - let fallback = visible && !passed_occlusion && !was_visible; - visible = in_frustum && (passed_occlusion || fallback); - let should_draw = visible && !was_visible; - if (should_draw) { - mesh_instance.lod_index = lod_index; - let mesh_lod = mesh.lods[lod_index]; - let meshlet_count = mesh_lod.meshlet_count; - let base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[0], meshlet_count, MemoryOrder::Relaxed); - for (u32 i = 0; i < meshlet_count; i++) { - let offset = base_meshlet_instance_offset + i; - var meshlet_instance = MeshletInstance(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = i; - meshlet_instances[offset] = meshlet_instance; - } - } + if (cull_frustum && !test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + return; + } - if (in_frustum && passed_occlusion) { - __atomic_or(mesh_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::Relaxed); + var lod_index = 0; +#if 1 + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); + + // Avoiding the atan here + let rough_resolution = max(camera.resolution.x, camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < camera.acceptable_lod_error) { + lod_index = i; } else { - __atomic_and(mesh_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::Relaxed); + break; } } - -#if 0 - // Credits: - // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 - let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; - let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; - let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; - let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; - let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); - let aabb_rough_camera_distance = max(length(aabb_center - camera.position) - 0.5 * aabb_rough_extent, 0.0); - - // Avoiding the atan here - let rough_resolution = max(camera.resolution.x, camera.resolution.y); - let fov90_distance_to_screen_ratio = 2.0f; - let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; - let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); - let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; - - for (var i = 1; i < mesh.lod_count; i++) { - let mesh_lod = mesh.lods[i]; - let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; - if (rough_pixel_error < camera.acceptable_lod_error) { - lod_index = i; - } else { - break; - } - } #endif + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_count = mesh_lod.meshlet_count; + var base_meshlet_instance_offset = __atomic_add(visible_meshlet_instances_count[2], meshlet_count, MemoryOrder::Relaxed); + for (u32 i = 0; i < meshlet_count; i++) { + let offset = base_meshlet_instance_offset + i; + meshlet_instances[offset].mesh_instance_index = mesh_instance_index; + meshlet_instances[offset].meshlet_index = i; + } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index b99efbd8..35c9f6cc 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -6,7 +6,7 @@ import debug_drawer; #include -[[vk::constant_id(0)]] const u32 LATE = 0; +[[vk::constant_id(0)]] const bool LATE = false; [[vk::binding(0)]] ConstantBuffer camera; [[vk::binding(1)]] StructuredBuffer meshlet_instances; [[vk::binding(2)]] StructuredBuffer mesh_instances; @@ -14,13 +14,14 @@ import debug_drawer; [[vk::binding(4)]] StructuredBuffer transforms; [[vk::binding(5)]] Image2D hiz_image; [[vk::binding(6)]] Sampler hiz_sampler; -[[vk::binding(7)]] StructuredBuffer visible_meshlet_instances_count; +[[vk::binding(7)]] RWStructuredBuffer visible_meshlet_instances_count; [[vk::binding(8)]] RWStructuredBuffer visible_meshlet_instances_indices; -[[vk::binding(9)]] RWStructuredBuffer cull_triangles_cmd; -[[vk::binding(10)]] RWStructuredBuffer debug_drawer; +[[vk::binding(9)]] RWStructuredBuffer meshlet_instance_visibility_mask; +[[vk::binding(10)]] RWStructuredBuffer cull_triangles_cmd; +[[vk::binding(11)]] RWStructuredBuffer debug_drawer; #ifndef CULLING_MESHLET_COUNT - #define CULLING_MESHLET_COUNT 64 +#define CULLING_MESHLET_COUNT 64 #endif [[shader("compute")]] @@ -29,34 +30,73 @@ func cs_main( uint3 thread_id : SV_DispatchThreadID, uniform CullFlags cull_flags ) -> void { - let meshlet_instance_count = visible_meshlet_instances_count[LATE]; - var meshlet_instance_index = thread_id.x; + let meshlet_instance_count = visible_meshlet_instances_count[2]; + let meshlet_instance_index = thread_id.x; if (meshlet_instance_index >= meshlet_instance_count) { return; } - if (LATE == 1) { - meshlet_instance_index += visible_meshlet_instances_count[0]; - } - let meshlet_instance = meshlet_instances[meshlet_instance_index]; let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; - let mesh = meshes[mesh_instance.mesh_index]; let transform = transforms[mesh_instance.transform_index]; + let mvp = mul(camera.projection_view_mat, transform.world); + + let mesh = meshes[mesh_instance.mesh_index]; let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; let cull_frustum = (cull_flags & CullFlags::MeshletFrustum) != 0; let cull_occlusion = (cull_flags & CullFlags::MeshletOcclusion) != 0; - var visible = true; + let meshlet_instance_visibility_index = mesh_instance.meshlet_instance_visibility_offset + meshlet_instance.meshlet_index; + let mask_index = meshlet_instance_visibility_index / 32; + let bit_index = meshlet_instance_visibility_index - mask_index * 32; + let visibility_bit = 1 << bit_index; + let was_visible = (meshlet_instance_visibility_mask[mask_index] & visibility_bit) != 0; + + var visible = LATE ? true : was_visible; if (visible && cull_frustum) { - let mvp = mul(camera.projection_view_mat, transform.world); visible = test_frustum(mvp, bounds.aabb_center, bounds.aabb_extent); } - if (visible) { - let index = __atomic_add(cull_triangles_cmd[0].x, 1, MemoryOrder::Relaxed); + if (LATE && visible && cull_occlusion) { + if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); +#ifdef DEBUG_DRAW + if (visible) { + let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; + let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; + var debug_rect = DebugRect(); + debug_rect.offset = f32x3((ndc_aabb_max + ndc_aabb_min) * 0.5, screen_aabb.max.z); + debug_rect.extent = ndc_aabb_max - ndc_aabb_min; + debug_rect.color = f32x3(1.0, 0.0, 0.0); + debug_rect.coord = DebugDrawCoord::NDC; + debug_draw_rect(debug_drawer[0], debug_rect); + } +#endif + } + } + + if (visible && (!LATE || !was_visible)) { + var index = 0; + if (!LATE) { + index = __atomic_add(visible_meshlet_instances_count[0], 1, MemoryOrder::Relaxed); + } else { + let early_count = visible_meshlet_instances_count[0]; + let late_offset = __atomic_add(visible_meshlet_instances_count[1], 1, MemoryOrder::Relaxed); + index = early_count + late_offset; + } + visible_meshlet_instances_indices[index] = meshlet_instance_index; + + __atomic_add(cull_triangles_cmd[0].x, 1, MemoryOrder::Relaxed); + } + + if (LATE) { + if (visible) { + __atomic_or(meshlet_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::AcquireRelease); + } else { + __atomic_and(meshlet_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::AcquireRelease); + } } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 21f5409a..1efa54f9 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -6,17 +6,16 @@ import scene; import passes.visbuffer; -struct ShaderParameters { - ConstantBuffer camera; - StructuredBuffer visible_meshlet_instances_indices; - StructuredBuffer meshlet_instances; - StructuredBuffer mesh_instances; - StructuredBuffer meshes; - StructuredBuffer transforms; - - RWStructuredBuffer draw_cmd; - RWStructuredBuffer reordered_indices; -}; +[[vk::constant_id(0)]] const bool LATE = false; +[[vk::binding(0)]] ConstantBuffer camera; +[[vk::binding(1)]] StructuredBuffer visible_meshlet_instances_count; +[[vk::binding(2)]] StructuredBuffer visible_meshlet_instances_indices; +[[vk::binding(3)]] StructuredBuffer meshlet_instances; +[[vk::binding(4)]] StructuredBuffer mesh_instances; +[[vk::binding(5)]] StructuredBuffer meshes; +[[vk::binding(6)]] StructuredBuffer transforms; +[[vk::binding(7)]] RWStructuredBuffer draw_cmd; +[[vk::binding(8)]] RWStructuredBuffer reordered_indices; groupshared u32 base_index_shared; groupshared u32 triangles_passed_shared; @@ -111,23 +110,32 @@ func test_triangle(in f32x3x3 positions, in f32x2 resolution, CullFlags cull_fla func cs_main( uint3 group_id : SV_GroupID, uint3 group_thread_id : SV_GroupThreadID, - uniform ParameterBlock params, uniform CullFlags cull_flags ) -> void { + let visible_meshlet_instance_count = visible_meshlet_instances_count[u32(LATE)]; + var visible_meshlet_instance_index = group_id.x; + if (visible_meshlet_instance_index >= visible_meshlet_instance_count) { + return; + } + + if (LATE) { + visible_meshlet_instance_index += visible_meshlet_instances_count[1]; + } + let local_index = group_thread_id.x; let triangle_index = local_index * 3; - let visible_meshlet_index = group_id.x; - let meshlet_instance_index = params.visible_meshlet_instances_indices[visible_meshlet_index]; - let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; - let mesh = params.meshes[mesh_instance.mesh_index]; + + let meshlet_instance_index = visible_meshlet_instances_indices[visible_meshlet_instance_index]; + let meshlet_instance = meshlet_instances[meshlet_instance_index]; + let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = meshes[mesh_instance.mesh_index]; let mesh_lod = mesh.lods[mesh_instance.lod_index]; let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; if (local_index == 0) { triangles_passed_shared = 0; - let transform = params.transforms[mesh_instance.transform_index]; - model_view_proj_shared = mul(params.camera.projection_view_mat, transform.world); + let transform = transforms[mesh_instance.transform_index]; + model_view_proj_shared = mul(camera.projection_view_mat, transform.world); } GroupMemoryBarrierWithGroupSync(); @@ -137,7 +145,7 @@ func cs_main( if (local_index < meshlet.triangle_count) { let indices = meshlet.indices(mesh_lod, local_index); let positions = meshlet.positions(mesh, indices); - triangle_passed = test_triangle(positions, params.camera.resolution, cull_flags, local_index); + triangle_passed = test_triangle(positions, camera.resolution, cull_flags, local_index); triangle_passed = true; if (triangle_passed) { active_triangle_index = __atomic_add(triangles_passed_shared, 1, MemoryOrder::Relaxed); @@ -147,19 +155,15 @@ func cs_main( GroupMemoryBarrierWithGroupSync(); if (local_index == 0) { - base_index_shared = __atomic_add(params.draw_cmd[0].index_count, triangles_passed_shared * 3, MemoryOrder::Relaxed); + base_index_shared = __atomic_add(draw_cmd[0].index_count, triangles_passed_shared * 3, MemoryOrder::Relaxed); } GroupMemoryBarrierWithGroupSync(); if (triangle_passed) { - u32 struct_count, struct_stride; - params.reordered_indices.GetDimensions(struct_count, struct_stride); let index_offset = base_index_shared + active_triangle_index * 3; - if (index_offset + 2 < struct_count) { - params.reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); - } + reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); + reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); + reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); } } diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang index ed3600df..de936bd7 100644 --- a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -1,12 +1,11 @@ import std; import gpu; -[[vk::constant_id(0)]] const u32 LATE = 0; [[vk::binding(0)]] StructuredBuffer visible_meshlet_instances_count; [[vk::binding(1)]] RWStructuredBuffer cull_meshlets_cmd; [[shader("compute")]] [[numthreads(1, 1, 1)]] func cs_main() -> void { - cull_meshlets_cmd[0].x = (visible_meshlet_instances_count[LATE] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; + cull_meshlets_cmd[0].x = (visible_meshlet_instances_count[2] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index df1b832e..29315b79 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -70,7 +70,7 @@ func fs_main(VertexOutput input) -> u32 { } #endif - InterlockedAdd(params.overdraw[u32x2(input.position.xy)], 1u); + __atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, MemoryOrder::AcquireRelease); let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); return vis.encode(); diff --git a/Lorr/Engine/Resources/shaders/passes/visualize_overdraw.slang b/Lorr/Engine/Resources/shaders/passes/visualize_overdraw.slang new file mode 100644 index 00000000..d28fa3a4 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/visualize_overdraw.slang @@ -0,0 +1,33 @@ +import gpu; + +#include + +struct ShaderParameters { + Image2D overdraw; +}; + +func inferno(f32 t) -> f32x3 { + let c0 = f32x3(0.0002189403691192265, 0.001651004631001012, -0.01948089843709184); + let c1 = f32x3(0.1065134194856116, 0.5639564367884091, 3.932712388889277); + let c2 = f32x3(11.60249308247187, -3.972853965665698, -15.9423941062914); + let c3 = f32x3(-41.70399613139459, 17.43639888205313, 44.35414519872813); + let c4 = f32x3(77.162935699427, -33.40235894210092, -81.80730925738993); + let c5 = f32x3(-71.31942824499214, 32.62606426397723, 73.20951985803202); + let c6 = f32x3(25.13112622477341, -12.24266895238567, -23.07032500287172); + + t = saturate(t); + return c0 + t * (c1 + t * (c2 + t * (c3 + t * (c4 + t * (c5 + t * c6))))); +} + +[[shader("fragment")]] +func fs_main( + VertexOutput input, + uniform ShaderParameters params, + uniform f32 heatmap_scale +) -> f32x4 { + let draw_scale = clamp(heatmap_scale, 0.0, 100.0) / 100.0; + let draw_count = f32(params.overdraw.Load(i32x3(i32x2(input.position.xy), 0))); + let heat = 1.0 - exp2(-draw_count * draw_scale); + let color = inferno(heat); + return f32x4(color, 1.0); +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 99970360..120838ed 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -280,6 +280,7 @@ public struct MeshInstance { public u32 lod_index = 0; public u32 material_index = 0; public u32 transform_index = 0; + public u32 meshlet_instance_visibility_offset = 0; }; public struct MeshLOD { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 2ae308b1..1c62ad10 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -178,6 +178,7 @@ struct MeshInstance { alignas(4) u32 lod_index = 0; alignas(4) u32 material_index = 0; alignas(4) u32 transform_index = 0; + alignas(4) u32 meshlet_instance_visibility_offset = 0; }; struct Meshlet { @@ -204,7 +205,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 1_sz; + constexpr static auto MAX_LODS = 8_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 097207db..7328d69f 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -664,6 +664,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< regenerate_sky |= self.last_environment.atmos_planet_radius != environment.atmos_planet_radius; self.last_environment = environment; + auto meshlet_instance_visibility_offset = 0_u32; auto max_meshlet_instance_count = 0_u32; auto gpu_meshes = std::vector(); auto gpu_mesh_instances = std::vector(); @@ -689,6 +690,9 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< mesh_instance.lod_index = lod0_index; mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; + mesh_instance.meshlet_instance_visibility_offset = meshlet_instance_visibility_offset; + + meshlet_instance_visibility_offset += lod0.meshlet_count; max_meshlet_instance_count += lod0.meshlet_count; } } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 10b084ff..83f2ac6d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -40,6 +40,7 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { .definitions = { #ifdef LS_DEBUG { "ENABLE_ASSERTIONS", "1" }, + { "DEBUG_DRAW", "1" }, #endif // DEBUG { "CULLING_MESH_COUNT", "64" }, { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, @@ -201,6 +202,12 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, hiz_slow_pipeline_info).value(); + auto visualize_overdraw_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.visualize_overdraw", + .entry_points = { "vs_main", "fs_main" }, + }; + Pipeline::create(device, default_slang_session, visualize_overdraw_pipeline_info).value(); + self.histogram_luminance_buffer = Buffer::create(device, sizeof(GPU::HistogramLuminance)).value(); vuk::fill(vuk::acquire_buf("histogram luminance", *device.buffer(self.histogram_luminance_buffer.id()), vuk::eNone), 0); @@ -338,10 +345,19 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.mesh_instance_visibility_mask_buffer = self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eNone); prepared_frame.mesh_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.mesh_instance_visibility_mask_buffer)); + + auto meshlet_instance_visibility_mask_size_bytes = (info.max_meshlet_instance_count + 31) / 32 * sizeof(u32); + self.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.resize(device, meshlet_instance_visibility_mask_size_bytes).value(); + prepared_frame.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instances visibility mask", vuk::eNone); + prepared_frame.meshlet_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.meshlet_instance_visibility_mask_buffer)); } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); prepared_frame.mesh_instance_visibility_mask_buffer = self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); + prepared_frame.meshlet_instance_visibility_mask_buffer = + self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instances visibility mask", vuk::eMemoryRead); } info.environment.transmittance_lut_size = self.sky_transmittance_lut_view.extent(); @@ -413,13 +429,11 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } static auto cull_meshes( - bool late, GPU::CullFlags cull_flags, u32 mesh_instance_count, TransferManager &transfer_man, vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, - vuk::Value &mesh_instance_visibility_mask_buffer, vuk::Value &meshlet_instances_buffer, vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &transforms_buffer, @@ -428,11 +442,10 @@ static auto cull_meshes( vuk::Value &debug_drawer_buffer ) -> vuk::Value { ZoneScoped; - memory::ScopedStack stack; auto vis_cull_meshes_pass = vuk::make_pass( - stack.format("vis cull meshes {}", late ? "late" : "early"), - [late, cull_flags, mesh_instance_count]( + "vis cull meshes", + [cull_flags, mesh_instance_count]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) meshes, @@ -440,7 +453,6 @@ static auto cull_meshes( VUK_IA(vuk::eComputeSampled) hiz, VUK_BA(vuk::eComputeRW) mesh_instances, VUK_BA(vuk::eComputeRW) meshlet_instances, - VUK_BA(vuk::eComputeRW) mesh_instance_visibility_mask, VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) debug_drawer ) { @@ -453,24 +465,12 @@ static auto cull_meshes( .bind_sampler(0, 4, hiz_sampler_info) .bind_buffer(0, 5, mesh_instances) .bind_buffer(0, 6, meshlet_instances) - .bind_buffer(0, 7, mesh_instance_visibility_mask) - .bind_buffer(0, 8, visible_meshlet_instances_count) - .bind_buffer(0, 9, debug_drawer) + .bind_buffer(0, 7, visible_meshlet_instances_count) + .bind_buffer(0, 8, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) - .specialize_constants(0, late) .dispatch_invocations(mesh_instance_count); - return std::make_tuple( - camera, - meshes, - transforms, - hiz, - mesh_instances, - meshlet_instances, - mesh_instance_visibility_mask, - visible_meshlet_instances_count, - debug_drawer - ); + return std::make_tuple(camera, meshes, transforms, hiz, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); } ); @@ -481,7 +481,6 @@ static auto cull_meshes( hiz_attachment, mesh_instances_buffer, meshlet_instances_buffer, - mesh_instance_visibility_mask_buffer, visible_meshlet_instances_count_buffer, debug_drawer_buffer ) = @@ -492,23 +491,19 @@ static auto cull_meshes( std::move(hiz_attachment), std::move(mesh_instances_buffer), std::move(meshlet_instances_buffer), - std::move(mesh_instance_visibility_mask_buffer), std::move(visible_meshlet_instances_count_buffer), std::move(debug_drawer_buffer) ); auto generate_cull_commands_pass = vuk::make_pass( - stack.format("generate cull commands {}", late ? "late" : "early"), - [late]( - vuk::CommandBuffer &cmd_list, // - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd - ) { + "generate cull commands", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { cmd_list // .bind_compute_pipeline("passes.generate_cull_commands") .bind_buffer(0, 0, visible_meshlet_instances_count) .bind_buffer(0, 1, cull_meshlets_cmd) - .specialize_constants(0, late) .dispatch(1); return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); @@ -530,6 +525,7 @@ static auto cull_meshlets( vuk::Value &cull_meshlets_cmd_buffer, vuk::Value &visible_meshlet_instances_count_buffer, vuk::Value &visible_meshlet_instances_indices_buffer, + vuk::Value &meshlet_instance_visibility_mask_buffer, vuk::Value &reordered_indices_buffer, vuk::Value &meshes_buffer, vuk::Value &mesh_instances_buffer, @@ -553,13 +549,12 @@ static auto cull_meshlets( VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeSampled) hiz, - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, - VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRW) meshlet_instance_visibility_mask, VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeRW) debug_drawer ) { - // cmd_list.image_barrier(hiz, vuk::eComputeRW, vuk::eComputeSampled, 0, hiz->level_count); - cmd_list // .bind_compute_pipeline("passes.cull_meshlets") .bind_buffer(0, 0, camera) @@ -571,8 +566,9 @@ static auto cull_meshlets( .bind_sampler(0, 6, hiz_sampler_info) .bind_buffer(0, 7, visible_meshlet_instances_count) .bind_buffer(0, 8, visible_meshlet_instances_indices) - .bind_buffer(0, 9, cull_triangles_cmd) - .bind_buffer(0, 10, debug_drawer) + .bind_buffer(0, 9, meshlet_instance_visibility_mask) + .bind_buffer(0, 10, cull_triangles_cmd) + .bind_buffer(0, 11, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) .specialize_constants(0, late) .dispatch_indirect(dispatch_cmd); @@ -587,6 +583,7 @@ static auto cull_meshlets( hiz, visible_meshlet_instances_count, visible_meshlet_instances_indices, + meshlet_instance_visibility_mask, cull_triangles_cmd, debug_drawer ); @@ -605,6 +602,7 @@ static auto cull_meshlets( hiz_attachment, visible_meshlet_instances_count_buffer, visible_meshlet_instances_indices_buffer, + meshlet_instance_visibility_mask_buffer, cull_triangles_cmd_buffer, debug_drawer_buffer ) = @@ -618,6 +616,7 @@ static auto cull_meshlets( std::move(hiz_attachment), std::move(visible_meshlet_instances_count_buffer), std::move(visible_meshlet_instances_indices_buffer), + std::move(meshlet_instance_visibility_mask_buffer), std::move(cull_triangles_cmd_buffer), std::move(debug_drawer_buffer) ); @@ -625,10 +624,11 @@ static auto cull_meshlets( // ── CULL TRIANGLES ────────────────────────────────────────────────── auto vis_cull_triangles_pass = vuk::make_pass( stack.format("vis cull triangles {}", late ? "late" : "early"), - [cull_flags]( + [late, cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, VUK_BA(vuk::eComputeRead) mesh_instances, @@ -640,18 +640,21 @@ static auto cull_meshlets( cmd_list // .bind_compute_pipeline("passes.cull_triangles") .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) - .bind_buffer(0, 3, mesh_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, draw_indexed_cmd) - .bind_buffer(0, 7, reordered_indices) + .bind_buffer(0, 1, visible_meshlet_instances_count) + .bind_buffer(0, 2, visible_meshlet_instances_indices) + .bind_buffer(0, 3, meshlet_instances) + .bind_buffer(0, 4, mesh_instances) + .bind_buffer(0, 5, meshes) + .bind_buffer(0, 6, transforms) + .bind_buffer(0, 7, draw_indexed_cmd) + .bind_buffer(0, 8, reordered_indices) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) + .specialize_constants(0, late) .dispatch_indirect(cull_triangles_cmd); return std::make_tuple( camera, + visible_meshlet_instances_count, visible_meshlet_instances_indices, meshlet_instances, mesh_instances, @@ -667,6 +670,7 @@ static auto cull_meshlets( std::tie( camera_buffer, + visible_meshlet_instances_count_buffer, visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, mesh_instances_buffer, @@ -678,6 +682,7 @@ static auto cull_meshlets( vis_cull_triangles_pass( std::move(cull_triangles_cmd_buffer), std::move(camera_buffer), + std::move(visible_meshlet_instances_count_buffer), std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), std::move(mesh_instances_buffer), @@ -1186,6 +1191,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value({}); - auto early_cull_meshlets_cmd_buffer = cull_meshes( - false, + auto visible_meshlet_instances_count_buffer = transfer_man.scratch_buffer({}); + auto cull_meshlets_cmd_buffer = cull_meshes( info.cull_flags, frame.mesh_instance_count, transfer_man, meshes_buffer, mesh_instances_buffer, - mesh_instance_visibility_mask_buffer, meshlet_instances_buffer, visible_meshlet_instances_count_buffer, transforms_buffer, @@ -1218,9 +1222,10 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value 0.0f) { + auto visualize_overdraw_pass = vuk::make_pass( + "visualize overdraw", + [scale = self.overdraw_heatmap_scale]( + vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eColorRW) dst, + VUK_IA(vuk::eFragmentSampled) overdraw + ) { + cmd_list // + .bind_graphics_pipeline("passes.visualize_overdraw") + .set_rasterization({}) + .set_color_blend(dst, vuk::BlendPreset::eOff) + .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) + .set_viewport(0, vuk::Rect2D::framebuffer()) + .set_scissor(0, vuk::Rect2D::framebuffer()) + .bind_image(0, 0, overdraw) + .push_constants(vuk::ShaderStageFlagBits::eFragment, 0, scale) + .draw(3, 1, 0, 0); + + return dst; + } + ); + + return visualize_overdraw_pass(std::move(dst_attachment), std::move(overdraw_attachment)); + } + // ── VISBUFFER DECODE ──────────────────────────────────────────────── auto vis_decode_pass = vuk::make_pass( "vis decode", @@ -1709,6 +1725,11 @@ auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { self.mesh_instance_visibility_mask_buffer = {}; } + if (self.meshlet_instance_visibility_mask_buffer) { + device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); + self.meshlet_instance_visibility_mask_buffer = {}; + } + if (self.materials_buffer) { device.destroy(self.materials_buffer.id()); self.materials_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index e3f25a30..4adbfd75 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -31,6 +31,7 @@ struct PreparedFrame { vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; vuk::Value mesh_instance_visibility_mask_buffer = {}; + vuk::Value meshlet_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; @@ -56,6 +57,7 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; Buffer mesh_instance_visibility_mask_buffer = {}; + Buffer meshlet_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; @@ -70,6 +72,7 @@ struct SceneRenderer { ImageView hiz_view = {}; bool debug_lines = false; + f32 overdraw_heatmap_scale = 0.0f; auto init(this SceneRenderer &) -> bool; auto destroy(this SceneRenderer &) -> void; From 207e51105255a7b2785ebcfc397233e80548d802 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:34:20 +0300 Subject: [PATCH 26/27] fix cull triangles OOB --- Lorr/Engine/Resources/shaders/passes/cull_triangles.slang | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 1efa54f9..8c6da6aa 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -112,14 +112,9 @@ func cs_main( uint3 group_thread_id : SV_GroupThreadID, uniform CullFlags cull_flags ) -> void { - let visible_meshlet_instance_count = visible_meshlet_instances_count[u32(LATE)]; var visible_meshlet_instance_index = group_id.x; - if (visible_meshlet_instance_index >= visible_meshlet_instance_count) { - return; - } - if (LATE) { - visible_meshlet_instance_index += visible_meshlet_instances_count[1]; + visible_meshlet_instance_index += visible_meshlet_instances_count[0]; } let local_index = group_thread_id.x; From 3d1f41b40299b339081af71832202d33af21120c Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 2 Sep 2025 18:22:36 +0300 Subject: [PATCH 27/27] more conservative hiz --- Lorr/Editor/main.cc | 2 +- Lorr/Engine/Resources/shaders/cull.slang | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/Lorr/Editor/main.cc b/Lorr/Editor/main.cc index 6b2c9d43..d789abd5 100755 --- a/Lorr/Editor/main.cc +++ b/Lorr/Editor/main.cc @@ -18,7 +18,7 @@ i32 main(i32, c8 **) { }; lr::AppBuilder() // - .module(3) + .module(1) .module(window_info) .module() .module() diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index bdcf6b1e..0badc40a 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -97,19 +97,14 @@ public func test_occlusion( let min_uv = screen_aabb.min.xy; let max_uv = screen_aabb.max.xy; - let min_texel = u32x2(clamp(min_uv * f32x2(hiz_size), 0.0, hiz_size - 1.0)); - let max_texel = u32x2(clamp(max_uv * f32x2(hiz_size), 0.0, hiz_size - 1.0)); + let min_texel = u32x2(max(min_uv * f32x2(hiz_size), 0.0)); + let max_texel = u32x2(min(max_uv * f32x2(hiz_size), hiz_size - 1.0)); - let size = max_texel - min_texel + 1; + let size = max_texel - min_texel; let max_size = max(size.x, size.y); - var mip = 0.0; - if (ceiling) { - mip = max(0.0, ceil(log2(max_size))); - } else { - mip = floor(log2(max_size)); - } + let mip = max(floor(log2(max_size)) - 1.0, 0.0); var uv = (min_uv + max_uv) * 0.5; let d = hiz_image.SampleLevel(hiz_sampler, uv, mip); - return screen_aabb.max.z <= d; + return screen_aabb.max.z < d; }