From cc04dc2ae24d03fa79a3e160b07efd9a89c2a94f Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:07:29 +0300 Subject: [PATCH 01/16] mesh optimization --- Lorr/Engine/Asset/Asset.cc | 229 +++++++++--------- Lorr/Engine/Asset/Model.hh | 2 + Lorr/Engine/Graphics/Slang/Compiler.cc | 5 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 45 ++-- .../shaders/passes/cull_triangles.slang | 15 +- .../shaders/passes/visbuffer_decode.slang | 12 +- .../shaders/passes/visbuffer_encode.slang | 9 +- Lorr/Engine/Resources/shaders/scene.slang | 66 +++-- Lorr/Engine/Resources/shaders/std/color.slang | 2 +- Lorr/Engine/Scene/GPUScene.hh | 8 +- Lorr/Engine/Scene/Scene.cc | 1 + Lorr/Engine/Scene/SceneRenderer.cc | 17 +- xmake/packages.lua | 4 +- 13 files changed, 211 insertions(+), 204 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 380655bb..b894c4e0 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -744,133 +744,145 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { } // ── MESH PROCESSING ───────────────────────────────────────────────── - std::vector model_vertex_positions = {}; - std::vector model_indices = {}; - - std::vector model_meshlets = {}; - std::vector model_meshlet_bounds = {}; - std::vector model_local_triangle_indices = {}; - - for (const auto &mesh : model->meshes) { - for (auto primitive_index : mesh.primitive_indices) { - ZoneScopedN("GPU Meshlet Generation"); + auto model_indices = std::move(gltf_callbacks.indices); + auto model_vertices = std::move(gltf_callbacks.vertex_positions); + auto model_normals = std::move(gltf_callbacks.vertex_normals); + auto model_texcoords = std::move(gltf_callbacks.vertex_texcoords); + + auto processed_indices = std::vector(); + auto processed_vertices = std::vector(); + auto processed_normals = std::vector(); + auto processed_texcoords = std::vector(); + auto processed_meshlets = std::vector(); + auto processed_meshlet_bounds = std::vector(); + auto processed_indirect_vertex_indices = std::vector(); + auto processed_local_triangle_indices = std::vector(); + + for (const auto &gltf_mesh : model->meshes) { + for (auto primitive_index : gltf_mesh.primitive_indices) { + ZoneNamedN(z, "GPU Meshlet Generation", true); auto &primitive = model->primitives[primitive_index]; - auto vertex_offset = model_vertex_positions.size(); - auto index_offset = model_indices.size(); - auto triangle_offset = model_local_triangle_indices.size(); - auto meshlet_offset = model_meshlets.size(); - - auto raw_indices = ls::span(gltf_callbacks.indices.data() + primitive.index_offset, primitive.index_count); - auto raw_vertex_positions = ls::span(gltf_callbacks.vertex_positions.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_vertex_normals = ls::span(gltf_callbacks.vertex_normals.data() + primitive.vertex_offset, primitive.vertex_count); - - auto meshlets = std::vector(); - auto meshlet_bounds_infos = std::vector(); - auto meshlet_indices = std::vector(); - auto local_triangle_indices = std::vector(); + auto meshlet_offset = processed_meshlets.size(); + + auto indices = std::vector(); + auto vertices = std::vector(); + auto normals = std::vector(); + auto texcoords = std::vector(); { - ZoneScopedN("Build Meshlets"); - // Worst case count - auto max_meshlets = meshopt_buildMeshletsBound( // - raw_indices.size(), - Model::MAX_MESHLET_INDICES, - Model::MAX_MESHLET_PRIMITIVES - ); - auto raw_meshlets = std::vector(max_meshlets); - meshlet_indices.resize(max_meshlets * Model::MAX_MESHLET_INDICES); - local_triangle_indices.resize(max_meshlets * Model::MAX_MESHLET_PRIMITIVES * 3); - auto meshlet_count = meshopt_buildMeshlets( // - raw_meshlets.data(), - meshlet_indices.data(), - local_triangle_indices.data(), - raw_indices.data(), - raw_indices.size(), - reinterpret_cast(raw_vertex_positions.data()), - raw_vertex_positions.size(), - sizeof(glm::vec3), - Model::MAX_MESHLET_INDICES, - Model::MAX_MESHLET_PRIMITIVES, - 0.0 - ); + ZoneNamedN(z2, "Remap geometry", true); + + auto raw_indices = std::span(model_indices.data() + primitive.index_offset, primitive.index_count); + auto raw_vertices = std::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); + auto raw_normals = std::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); + auto raw_texcoords = std::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); + + // clang-format off + auto remapped_vertices = std::vector(raw_vertices.size()); + auto vertex_count = meshopt_optimizeVertexFetchRemap(remapped_vertices.data(), raw_indices.data(), raw_indices.size(), primitive.vertex_count); + + vertices.resize(vertex_count); + meshopt_remapVertexBuffer(vertices.data(), raw_vertices.data(), raw_vertices.size(), sizeof(glm::vec3), remapped_vertices.data()); + + normals.resize(vertex_count); + meshopt_remapVertexBuffer(normals.data(), raw_normals.data(), raw_normals.size(), sizeof(glm::vec3), remapped_vertices.data()); + + texcoords.resize(vertex_count); + meshopt_remapVertexBuffer(texcoords.data(), raw_texcoords.data(), raw_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); + + indices.resize(raw_indices.size()); + meshopt_remapIndexBuffer(indices.data(), raw_indices.data(), primitive.index_count, remapped_vertices.data()); + // clang-format on + + { + auto optimized_indices = std::vector(raw_indices.size()); + meshopt_optimizeVertexCache(optimized_indices.data(), indices.data(), indices.size(), vertex_count); + indices = std::move(optimized_indices); + } + } - // Trim meshlets from worst case to current case - raw_meshlets.resize(meshlet_count); - meshlets.resize(meshlet_count); - meshlet_bounds_infos.resize(meshlet_count); - const auto &last_meshlet = raw_meshlets[meshlet_count - 1]; - meshlet_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); - local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); - - for (const auto &[raw_meshlet, meshlet, meshlet_bounds] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds_infos)) { - // AABB Computing - auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); - auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); - for (u32 i = 0; i < raw_meshlet.triangle_count * 3; i++) { - const auto &tri_pos = raw_vertex_positions - [meshlet_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; - meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); - meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); - } - - // SB and Cone Computing - // auto sphere_bounds = meshopt_computeMeshletBounds( // - // &meshlet_indices[raw_meshlet.vertex_offset], - // &local_triangle_indices[raw_meshlet.triangle_offset], - // raw_meshlet.triangle_count, - // reinterpret_cast(raw_vertex_positions.data()), - // raw_vertex_positions.size(), - // sizeof(glm::vec3) - // ); - - meshlet.vertex_offset = vertex_offset; - meshlet.index_offset = index_offset + raw_meshlet.vertex_offset; - meshlet.triangle_offset = triangle_offset + raw_meshlet.triangle_offset; - meshlet.triangle_count = raw_meshlet.triangle_count; - meshlet_bounds.aabb_min = meshlet_bb_min; - meshlet_bounds.aabb_max = meshlet_bb_max; - // meshlet_bounds.sphere_center.x = sphere_bounds.center[0]; - // meshlet_bounds.sphere_center.y = sphere_bounds.center[1]; - // meshlet_bounds.sphere_center.z = sphere_bounds.center[2]; - // meshlet_bounds.sphere_radius = sphere_bounds.radius; + // Worst case count + auto max_meshlet_count = meshopt_buildMeshletsBound(indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); + auto meshlets = std::vector(max_meshlet_count); + auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); + auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); + + auto meshlet_count = meshopt_buildMeshlets( + meshlets.data(), + indirect_vertex_indices.data(), + local_triangle_indices.data(), + indices.data(), + indices.size(), + reinterpret_cast(vertices.data()), + vertices.size(), + sizeof(glm::vec3), + Model::MAX_MESHLET_INDICES, + Model::MAX_MESHLET_PRIMITIVES, + 0.0 + ); + + // Trim meshlets from worst case to current case + meshlets.resize(meshlet_count); + const auto &last_meshlet = meshlets[meshlet_count - 1]; + indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); + local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); + + auto meshlet_bounds = std::vector(meshlet_count); + for (const auto &[meshlet, meshlet_aabb] : std::views::zip(meshlets, meshlet_bounds)) { + // AABB computation + auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); + auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); + for (u32 i = 0; i < meshlet.triangle_count * 3; i++) { + const auto &tri_pos = + vertices[indirect_vertex_indices[meshlet.vertex_offset + local_triangle_indices[meshlet.triangle_offset + i]]]; + meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); + meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } - primitive.meshlet_count = meshlet_count; - primitive.meshlet_offset = meshlet_offset; - primitive.local_triangle_indices_offset = triangle_offset; + meshlet_aabb.aabb_min = meshlet_bb_min; + meshlet_aabb.aabb_max = meshlet_bb_max; } - std::ranges::move(raw_vertex_positions, std::back_inserter(model_vertex_positions)); - std::ranges::move(meshlet_indices, std::back_inserter(model_indices)); - std::ranges::move(meshlets, std::back_inserter(model_meshlets)); - std::ranges::move(meshlet_bounds_infos, std::back_inserter(model_meshlet_bounds)); - std::ranges::move(local_triangle_indices, std::back_inserter(model_local_triangle_indices)); + primitive.meshlet_count = static_cast(meshlet_count); + primitive.meshlet_offset = static_cast(meshlet_offset); + + std::ranges::move(indices, std::back_inserter(processed_indices)); + std::ranges::move(vertices, std::back_inserter(processed_vertices)); + std::ranges::move(normals, std::back_inserter(processed_normals)); + std::ranges::move(texcoords, std::back_inserter(processed_texcoords)); + std::ranges::move(meshlets, std::back_inserter(processed_meshlets)); + std::ranges::move(meshlet_bounds, std::back_inserter(processed_meshlet_bounds)); + std::ranges::move(local_triangle_indices, std::back_inserter(processed_local_triangle_indices)); + std::ranges::move(indirect_vertex_indices, std::back_inserter(processed_indirect_vertex_indices)); } } auto &transfer_man = impl->device->transfer_man(); - model->indices = Buffer::create(*impl->device, ls::size_bytes(model_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_indices), model->indices)); + model->indices = Buffer::create(*impl->device, ls::size_bytes(processed_indices)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_indices), model->indices)); - model->vertex_positions = Buffer::create(*impl->device, ls::size_bytes(model_vertex_positions)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_vertex_positions), model->vertex_positions)); + model->vertex_positions = Buffer::create(*impl->device, ls::size_bytes(processed_vertices)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_vertices), model->vertex_positions)); - model->vertex_normals = Buffer::create(*impl->device, ls::size_bytes(gltf_callbacks.vertex_normals)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(gltf_callbacks.vertex_normals), model->vertex_normals)); + model->vertex_normals = Buffer::create(*impl->device, ls::size_bytes(processed_normals)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_normals), model->vertex_normals)); - if (!gltf_callbacks.vertex_texcoords.empty()) { - model->texture_coords = Buffer::create(*impl->device, ls::size_bytes(gltf_callbacks.vertex_texcoords)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(gltf_callbacks.vertex_texcoords), model->texture_coords)); + if (!processed_texcoords.empty()) { + model->texture_coords = Buffer::create(*impl->device, ls::size_bytes(processed_texcoords)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_texcoords), model->texture_coords)); } - model->meshlets = Buffer::create(*impl->device, ls::size_bytes(model_meshlets)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_meshlets), model->meshlets)); + model->meshlets = Buffer::create(*impl->device, ls::size_bytes(processed_meshlets)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_meshlets), model->meshlets)); + + model->meshlet_bounds = Buffer::create(*impl->device, ls::size_bytes(processed_meshlet_bounds)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_meshlet_bounds), model->meshlet_bounds)); - model->meshlet_bounds = Buffer::create(*impl->device, ls::size_bytes(model_meshlet_bounds)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_meshlet_bounds), model->meshlet_bounds)); + model->local_triangle_indices = Buffer::create(*impl->device, ls::size_bytes(processed_local_triangle_indices)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_local_triangle_indices), model->local_triangle_indices)); - model->local_triangle_indices = Buffer::create(*impl->device, ls::size_bytes(model_local_triangle_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_local_triangle_indices), model->local_triangle_indices)); + model->indirect_vertex_indices = Buffer::create(*impl->device, ls::size_bytes(processed_indirect_vertex_indices)).value(); + transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_indirect_vertex_indices), model->indirect_vertex_indices)); return true; } @@ -898,6 +910,7 @@ auto AssetManager::unload_model(const UUID &uuid) -> bool { impl->device->destroy(model->meshlets.id()); impl->device->destroy(model->meshlet_bounds.id()); impl->device->destroy(model->local_triangle_indices.id()); + impl->device->destroy(model->indirect_vertex_indices.id()); impl->models.destroy_slot(asset->model_id); asset->model_id = ModelID::Invalid; diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index 6a060978..aceb8ba5 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -103,6 +103,7 @@ struct Model { usize default_scene_index = 0; + // TODO: Make this one buffer Buffer indices = {}; Buffer vertex_positions = {}; Buffer vertex_normals = {}; @@ -110,5 +111,6 @@ struct Model { Buffer meshlets = {}; Buffer meshlet_bounds = {}; Buffer local_triangle_indices = {}; + Buffer indirect_vertex_indices = {}; }; } // namespace lr diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index 6bc4cf43..4415552e 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -332,7 +332,8 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::option ls::option std::expected std::expected std::expected std::expected()); self.runtime.emplace( diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 254d065d..318ad7c4 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -72,13 +72,12 @@ func CullSmallPrimitive(f32x2x3 vertices, f32x2 viewportExtent) -> bool { } func test_triangle(in Mesh mesh, in Meshlet meshlet, in f32x2 resolution, CullFlags cull_flags, u32 triangle_index) -> bool { - const Triangle tri = meshlet.indices(mesh, triangle_index); - const u32x3 vertices = meshlet.vertices(mesh, tri); - const f32x3x3 positions = meshlet.positions(mesh, vertices); + let indices = meshlet.indices(mesh, triangle_index); + let positions = meshlet.positions(mesh, indices); - const f32x4 clip_pos_0 = mul(model_view_proj_shared, f32x4(positions[0], 1.0)); - const f32x4 clip_pos_1 = mul(model_view_proj_shared, f32x4(positions[1], 1.0)); - const f32x4 clip_pos_2 = mul(model_view_proj_shared, f32x4(positions[2], 1.0)); + let clip_pos_0 = mul(model_view_proj_shared, f32x4(positions[0], 1.0)); + let clip_pos_1 = mul(model_view_proj_shared, f32x4(positions[1], 1.0)); + let clip_pos_2 = mul(model_view_proj_shared, f32x4(positions[2], 1.0)); // Cull all triangles behind camera if (clip_pos_0.z < 0.0 && clip_pos_1.z < 0.0 && clip_pos_2.z < 0.0) { @@ -127,7 +126,7 @@ func cs_main( if (local_index == 0) { triangles_passed_shared = 0; - const Transform transform = params.transforms[meshlet_instance.transform_index]; + let transform = params.transforms[meshlet_instance.transform_index]; model_view_proj_shared = mul(params.camera->projection_view_mat, transform.world); } @@ -151,7 +150,7 @@ func cs_main( std::control_barrier(std::memory_order_acq_rel); if (triangle_passed) { - const u32 index_offset = base_index_shared + active_triangle_index * 3; + let index_offset = base_index_shared + active_triangle_index * 3; params.reordered_indices[index_offset + 0] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); params.reordered_indices[index_offset + 1] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); params.reordered_indices[index_offset + 2] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 0bd391c1..662ca62e 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -10,7 +10,6 @@ import passes.visbuffer; struct ShaderParameters { Image2D visbuffer; ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; StructuredBuffer meshes; StructuredBuffer transforms; @@ -96,18 +95,17 @@ func fs_main(VertexOutput input) -> FragmentOutput { FragmentOutput output = {}; let vis = VisBufferData(texel); - let meshlet_instance_index = params.visible_meshlet_instances_indices[vis.meshlet_instance_index]; + let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; let transform = params.transforms[meshlet_instance.transform_index]; let material = params.materials[meshlet_instance.material_index]; - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; + let indices = meshlet.indices(mesh, vis.triangle_index); - let vertices = meshlet.vertices(mesh, indices); - let positions = meshlet.positions(mesh, vertices); - let normals = meshlet.normals(mesh, vertices); - let tex_coords = meshlet.tex_coords(mesh, vertices); + let positions = meshlet.positions(mesh, indices); + let normals = meshlet.normals(mesh, indices); + let tex_coords = meshlet.tex_coords(mesh, indices); let world_positions = transform.to_world_positions(positions); let NDC = f32x3(input.tex_coord * 2.0 - 1.0, 1.0); let deriv = compute_partial_derivatives(world_positions, NDC.xy, params.camera.resolution); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 8748131d..a00dfa83 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -7,7 +7,7 @@ import passes.visbuffer; struct ShaderParameters { ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; + StructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; StructuredBuffer meshes; StructuredBuffer transforms; @@ -35,9 +35,8 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; let index = meshlet.index(mesh, vis.triangle_index); - let vertex = meshlet.vertex(mesh, index); - let vertex_pos = meshlet.position(mesh, vertex); - let tex_coord = meshlet.tex_coord(mesh, vertex); + let vertex_pos = meshlet.position(mesh, index); + let tex_coord = meshlet.tex_coord(mesh, index); let world_pos = transform.to_world_position(vertex_pos); let clip_pos = mul(params.camera.projection_view_mat, f32x4(world_pos.xyz, 1.0)); @@ -45,7 +44,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.position = clip_pos; output.vertex_pos = vertex_pos; output.tex_coord = tex_coord; - output.meshlet_instance_index = vis.meshlet_instance_index; + output.meshlet_instance_index = meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; output.material_index = meshlet_instance.material_index; diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 1ca63fa5..6a38b0b0 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -141,7 +141,7 @@ public struct Material { public func sample_albedo_color(in UVGradient grad) -> f32x4 { if (this.flags & MaterialFlag::HasAlbedoImage) { - const let color = material_images[this.albedo_image_index] + let color = material_images[this.albedo_image_index] .sample_grad(material_samplers[this.albedo_image_index], grad.uv, grad.ddx, grad.ddy); return this.albedo_color * color; } @@ -160,7 +160,7 @@ public struct Material { public func sample_emissive_color(in UVGradient grad) -> f32x3 { if (this.flags & MaterialFlag::HasEmissiveImage) { - const let color = material_images[this.emissive_image_index] + let color = material_images[this.emissive_image_index] .sample_grad(material_samplers[this.emissive_image_index], grad.uv, grad.ddx, grad.ddy).rgb; return this.emissive_color * color; } @@ -169,9 +169,9 @@ public struct Material { } public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { - const let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); + let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { - const let color = material_images[this.metallic_rougness_image_index] + let color = material_images[this.metallic_rougness_image_index] .sample_grad(material_samplers[this.metallic_rougness_image_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } @@ -189,72 +189,65 @@ public struct Material { } }; -public typealias Triangle = u32x3; public struct Meshlet { public u32 vertex_offset = 0; - public u32 index_offset = 0; public u32 triangle_offset = 0; + public u32 vertex_count = 0; public u32 triangle_count = 0; // Takes a local triange index and returns an index to index buffer. public func index(in Mesh mesh, u32 i) -> u32 { - return u32(mesh.local_triangle_indices[this.triangle_offset + i]); - } - - // Returns index to an actual vertex. - public func vertex(in Mesh mesh, u32 index) -> u32 { - return mesh.indices[this.index_offset + index]; + let local_triangle_index = u32(mesh.local_triangle_indices[this.triangle_offset + i]); + return mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_index]; } // Returns position of a vertex. - public func position(in Mesh mesh, u32 vertex) -> f32x3 { - return mesh.vertex_positions[this.vertex_offset + vertex]; + public func position(in Mesh mesh, u32 index) -> f32x3 { + return mesh.vertex_positions[index]; } - public func tex_coord(in Mesh mesh, u32 vertex) -> f32x2 { + public func tex_coord(in Mesh mesh, u32 index) -> f32x2 { if (mesh.texture_coords == nullptr) { return {}; } - return mesh.texture_coords[this.vertex_offset + vertex]; + return mesh.texture_coords[index]; } // ---------------------------------------------------------- - public func indices(in Mesh mesh, u32 i) -> Triangle { - return { + public func indices(in Mesh mesh, u32 i) -> u32x3 { + let local_triangle_indices = u32x3( u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 0]), u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 1]), u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 2]), - }; - } + ); - public func vertices(in Mesh mesh, in Triangle indices) -> u32x3 { - return { mesh.indices[this.index_offset + indices.x], - mesh.indices[this.index_offset + indices.y], - mesh.indices[this.index_offset + indices.z] }; + return { mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.x], + mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.y], + mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.z] }; } - public func positions(in Mesh mesh, in u32x3 vertices) -> f32x3x3 { - return { mesh.vertex_positions[this.vertex_offset + vertices.x], - mesh.vertex_positions[this.vertex_offset + vertices.y], - mesh.vertex_positions[this.vertex_offset + vertices.z] }; + public func positions(in Mesh mesh, in u32x3 indices) -> f32x3x3 { + return { mesh.vertex_positions[indices.x], + mesh.vertex_positions[indices.y], + mesh.vertex_positions[indices.z] }; } - public func normals(in Mesh mesh, in u32x3 vertices) -> f32x3x3 { - return { mesh.vertex_normals[this.vertex_offset + vertices.x], - mesh.vertex_normals[this.vertex_offset + vertices.y], - mesh.vertex_normals[this.vertex_offset + vertices.z] }; + public func normals(in Mesh mesh, in u32x3 indices) -> f32x3x3 { + return { mesh.vertex_normals[indices.x], + mesh.vertex_normals[indices.y], + mesh.vertex_normals[indices.z] }; } - public func tex_coords(in Mesh mesh, in u32x3 vertices) -> f32x2x3 { + public func tex_coords(in Mesh mesh, in u32x3 indices) -> f32x2x3 { if (mesh.texture_coords == nullptr) { return {}; } - return { mesh.texture_coords[this.vertex_offset + vertices.x], - mesh.texture_coords[this.vertex_offset + vertices.y], - mesh.texture_coords[this.vertex_offset + vertices.z] }; + return { mesh.texture_coords[indices.x], + mesh.texture_coords[indices.y], + mesh.texture_coords[indices.z] }; } }; @@ -278,6 +271,7 @@ public struct Mesh { public Meshlet *meshlets = nullptr; public MeshletBounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; + public u32 *indirect_vertex_indices = nullptr; }; public struct Light { diff --git a/Lorr/Engine/Resources/shaders/std/color.slang b/Lorr/Engine/Resources/shaders/std/color.slang index 7eef8515..95fb6c1b 100644 --- a/Lorr/Engine/Resources/shaders/std/color.slang +++ b/Lorr/Engine/Resources/shaders/std/color.slang @@ -23,7 +23,7 @@ public func rec2020_to_xyz(f32x3 color) -> f32x3 { f32x3x3 mat = { 0.636958, 0.1446169, 0.168881, 0.2627002, 0.6779981, 0.0593017, - 0.0, 0.0280727, 1.0609851 + 0.0, 0.0280727, 1.0609851 }; return mul(mat, color); } diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 746637a4..52ee87ec 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -150,13 +150,6 @@ struct Material { alignas(4) u32 occlusion_image_index = ~0_u32; }; -struct Meshlet { - alignas(4) u32 vertex_offset = 0; - alignas(4) u32 index_offset = 0; - alignas(4) u32 triangle_offset = 0; - alignas(4) u32 triangle_count = 0; -}; - struct MeshletBounds { alignas(4) glm::vec3 aabb_min = {}; alignas(4) glm::vec3 aabb_max = {}; @@ -177,6 +170,7 @@ struct Mesh { alignas(8) u64 meshlets = 0; alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; + alignas(8) u64 indirect_vertex_indices = 0; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index f5a91f59..0b2ac724 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -742,6 +742,7 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { gpu_mesh.vertex_normals = model->vertex_normals.device_address(); gpu_mesh.texture_coords = model->texture_coords.device_address(); gpu_mesh.local_triangle_indices = model->local_triangle_indices.device_address(); + gpu_mesh.indirect_vertex_indices = model->indirect_vertex_indices.device_address(); gpu_mesh.meshlet_bounds = model->meshlet_bounds.device_address(); gpu_mesh.meshlets = model->meshlets.device_address(); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 49bd448f..f35fdc25 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -859,9 +859,8 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: VUK_IA(vuk::eColorRW) normal, VUK_IA(vuk::eColorRW) emissive, VUK_IA(vuk::eColorRW) metallic_roughness_occlusion, - VUK_IA(vuk::eFragmentSampled) visbuffer, + VUK_IA(vuk::eFragmentRead) visbuffer, VUK_BA(vuk::eFragmentRead) camera, - VUK_BA(vuk::eFragmentRead) visible_meshlet_instances_indices, VUK_BA(vuk::eFragmentRead) meshlet_instances, VUK_BA(vuk::eFragmentRead) meshes, VUK_BA(vuk::eFragmentRead) transforms, @@ -878,14 +877,13 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) .set_viewport(0, vuk::Rect2D::framebuffer()) .set_scissor(0, vuk::Rect2D::framebuffer()) + .bind_persistent(1, *descriptor_set) .bind_image(0, 0, visbuffer) .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, materials) - .bind_persistent(1, *descriptor_set) + .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_buffer(0, 5, materials) .draw(3, 1, 0, 1); return std::make_tuple( @@ -895,7 +893,6 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: metallic_roughness_occlusion, visbuffer, camera, - visible_meshlet_instances_indices, meshlet_instances, meshes, transforms @@ -946,7 +943,6 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: metallic_roughness_occlusion_attachment, visbuffer_attachment, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, meshes_buffer, transforms_buffer @@ -958,7 +954,6 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: std::move(metallic_roughness_occlusion_attachment), std::move(visbuffer_attachment), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), diff --git a/xmake/packages.lua b/xmake/packages.lua index 0c84ea51..a9732bcb 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -60,8 +60,6 @@ add_requires("vuk 2025.06.15", { configs = { }, debug = is_mode("debug") }) add_requires("meshoptimizer v0.22") -add_requires("ktx v4.4.0", { - -- debug = is_mode("debug") -}) +add_requires("ktx v4.4.0") add_requires("svector v1.0.3") From 1b199eea4107cd8295bfe81e16e28f2b8a1ba178 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 24 Jul 2025 00:59:27 +0300 Subject: [PATCH 02/16] improve gpu meshes --- Lorr/Engine/Asset/Asset.cc | 112 ++++++++++-------- Lorr/Engine/Asset/Model.hh | 9 +- .../shaders/passes/visbuffer_decode.slang | 2 +- .../shaders/passes/visbuffer_encode.slang | 2 +- Lorr/Engine/Resources/shaders/scene.slang | 3 +- Lorr/Engine/Scene/GPUScene.hh | 3 +- Lorr/Engine/Scene/Scene.cc | 30 ++--- 7 files changed, 79 insertions(+), 82 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index b894c4e0..9be8d800 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -680,7 +680,6 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto primitive_index = info->model->primitives.size(); auto &primitive = info->model->primitives.emplace_back(); auto *material_asset = app.asset_man.get_asset(info->model->materials[material_index]); - auto global_material_index = SlotMap_decode_id(material_asset->material_id).index; info->vertex_positions.resize(info->vertex_positions.size() + vertex_count); info->vertex_normals.resize(info->vertex_normals.size() + vertex_count); @@ -688,7 +687,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { info->indices.resize(info->indices.size() + index_count); mesh.primitive_indices.push_back(primitive_index); - primitive.material_index = global_material_index; + primitive.material_id = material_asset->material_id; primitive.vertex_offset = vertex_offset; primitive.vertex_count = vertex_count; primitive.index_offset = index_offset; @@ -749,21 +748,14 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto model_normals = std::move(gltf_callbacks.vertex_normals); auto model_texcoords = std::move(gltf_callbacks.vertex_texcoords); - auto processed_indices = std::vector(); - auto processed_vertices = std::vector(); - auto processed_normals = std::vector(); - auto processed_texcoords = std::vector(); - auto processed_meshlets = std::vector(); - auto processed_meshlet_bounds = std::vector(); - auto processed_indirect_vertex_indices = std::vector(); - auto processed_local_triangle_indices = std::vector(); - + auto &transfer_man = impl->device->transfer_man(); for (const auto &gltf_mesh : model->meshes) { for (auto primitive_index : gltf_mesh.primitive_indices) { ZoneNamedN(z, "GPU Meshlet Generation", true); auto &primitive = model->primitives[primitive_index]; - auto meshlet_offset = processed_meshlets.size(); + auto &gpu_mesh = model->gpu_meshes.emplace_back(); + auto &gpu_mesh_buffer = model->gpu_mesh_buffers.emplace_back(); auto indices = std::vector(); auto vertices = std::vector(); @@ -843,46 +835,62 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { meshlet_aabb.aabb_max = meshlet_bb_max; } - primitive.meshlet_count = static_cast(meshlet_count); - primitive.meshlet_offset = static_cast(meshlet_offset); - - std::ranges::move(indices, std::back_inserter(processed_indices)); - std::ranges::move(vertices, std::back_inserter(processed_vertices)); - std::ranges::move(normals, std::back_inserter(processed_normals)); - std::ranges::move(texcoords, std::back_inserter(processed_texcoords)); - std::ranges::move(meshlets, std::back_inserter(processed_meshlets)); - std::ranges::move(meshlet_bounds, std::back_inserter(processed_meshlet_bounds)); - std::ranges::move(local_triangle_indices, std::back_inserter(processed_local_triangle_indices)); - std::ranges::move(indirect_vertex_indices, std::back_inserter(processed_indirect_vertex_indices)); - } - } - - auto &transfer_man = impl->device->transfer_man(); - model->indices = Buffer::create(*impl->device, ls::size_bytes(processed_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_indices), model->indices)); - - model->vertex_positions = Buffer::create(*impl->device, ls::size_bytes(processed_vertices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_vertices), model->vertex_positions)); + auto upload_size = 0 // + + ls::size_bytes(indices) // + + ls::size_bytes(vertices) // + + ls::size_bytes(normals) // + + ls::size_bytes(texcoords) // + + ls::size_bytes(meshlets) // + + ls::size_bytes(meshlet_bounds) // + + ls::size_bytes(local_triangle_indices) // + + ls::size_bytes(indirect_vertex_indices); + gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); + auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); + + auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); + auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); + auto upload_offset = 0_u64; + + gpu_mesh.indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, indices.data(), ls::size_bytes(indices)); + upload_offset += ls::size_bytes(indices); + + gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, vertices.data(), ls::size_bytes(vertices)); + upload_offset += ls::size_bytes(vertices); + + gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, normals.data(), ls::size_bytes(normals)); + upload_offset += ls::size_bytes(normals); + + if (!texcoords.empty()) { + gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, texcoords.data(), ls::size_bytes(texcoords)); + upload_offset += ls::size_bytes(texcoords); + } - model->vertex_normals = Buffer::create(*impl->device, ls::size_bytes(processed_normals)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_normals), model->vertex_normals)); + gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, meshlets.data(), ls::size_bytes(meshlets)); + upload_offset += ls::size_bytes(meshlets); - if (!processed_texcoords.empty()) { - model->texture_coords = Buffer::create(*impl->device, ls::size_bytes(processed_texcoords)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_texcoords), model->texture_coords)); - } + gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, meshlet_bounds.data(), ls::size_bytes(meshlet_bounds)); + upload_offset += ls::size_bytes(meshlet_bounds); - model->meshlets = Buffer::create(*impl->device, ls::size_bytes(processed_meshlets)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_meshlets), model->meshlets)); + gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, local_triangle_indices.data(), ls::size_bytes(local_triangle_indices)); + upload_offset += ls::size_bytes(local_triangle_indices); - model->meshlet_bounds = Buffer::create(*impl->device, ls::size_bytes(processed_meshlet_bounds)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_meshlet_bounds), model->meshlet_bounds)); + gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); + upload_offset += ls::size_bytes(indirect_vertex_indices); - model->local_triangle_indices = Buffer::create(*impl->device, ls::size_bytes(processed_local_triangle_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_local_triangle_indices), model->local_triangle_indices)); + gpu_mesh.material_index = SlotMap_decode_id(primitive.material_id).index; + gpu_mesh.meshlet_count = meshlet_count; - model->indirect_vertex_indices = Buffer::create(*impl->device, ls::size_bytes(processed_indirect_vertex_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(processed_indirect_vertex_indices), model->indirect_vertex_indices)); + transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + } + } return true; } @@ -1574,12 +1582,12 @@ auto AssetManager::get_materials_buffer() -> vuk::Value { auto occlusion_image_index = uuid_to_index(material->occlusion_texture); auto flags = GPU::MaterialFlag::None; - flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; - flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; - //flags |= GPU::MaterialFlag::NormalFlipY; + // flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; + // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + // flags |= GPU::MaterialFlag::NormalFlipY; return { .albedo_color = material->albedo_color, diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index aceb8ba5..6ad32dc7 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -4,6 +4,7 @@ #include "Engine/Asset/UUID.hh" #include "Engine/Graphics/Vulkan.hh" +#include "Engine/Scene/GPUScene.hh" namespace lr { struct TextureSamplerInfo { @@ -65,10 +66,7 @@ struct Model { using Index = u32; struct Primitive { - u32 material_index = 0; - u32 meshlet_count = 0; - u32 meshlet_offset = 0; - u32 local_triangle_indices_offset = 0; + MaterialID material_id = MaterialID::Invalid; u32 vertex_count = 0; u32 vertex_offset = 0; u32 index_count = 0; @@ -101,6 +99,9 @@ struct Model { std::vector nodes = {}; std::vector scenes = {}; + std::vector gpu_meshes = {}; + std::vector gpu_mesh_buffers = {}; + usize default_scene_index = 0; // TODO: Make this one buffer diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 662ca62e..2d7d11bb 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -98,8 +98,8 @@ func fs_main(VertexOutput input) -> FragmentOutput { let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; + let material = params.materials[mesh.material_index]; let transform = params.transforms[meshlet_instance.transform_index]; - let material = params.materials[meshlet_instance.material_index]; let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; let indices = meshlet.indices(mesh, vis.triangle_index); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index a00dfa83..4174ae27 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -46,7 +46,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.tex_coord = tex_coord; output.meshlet_instance_index = meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; - output.material_index = meshlet_instance.material_index; + output.material_index = mesh.material_index; return output; } diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 6a38b0b0..91455748 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -258,7 +258,6 @@ public struct MeshletBounds { public struct MeshletInstance { public u32 mesh_index = 0; - public u32 material_index = 0; public u32 transform_index = 0; public u32 meshlet_index = 0; }; @@ -272,6 +271,8 @@ public struct Mesh { public MeshletBounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; public u32 *indirect_vertex_indices = nullptr; + public u32 meshlet_count = 0; + public u32 material_index = 0; }; public struct Light { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 52ee87ec..20e59559 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -157,7 +157,6 @@ struct MeshletBounds { struct MeshletInstance { alignas(4) u32 mesh_index = 0; - alignas(4) u32 material_index = 0; alignas(4) u32 transform_index = 0; alignas(4) u32 meshlet_index = 0; }; @@ -171,6 +170,8 @@ struct Mesh { alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; alignas(8) u64 indirect_vertex_indices = 0; + alignas(4) u32 meshlet_count = 0; + alignas(4) u32 material_index = 0; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 0b2ac724..649afb5f 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -732,31 +732,17 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { auto *model = app.asset_man.get_model(rendering_mesh.n0); - const auto &mesh = model->meshes[rendering_mesh.n1]; - - // ── PER MESH INFORMATION ──────────────────────────────────────────── - auto mesh_offset = gpu_meshes.size(); - auto &gpu_mesh = gpu_meshes.emplace_back(); - gpu_mesh.indices = model->indices.device_address(); - gpu_mesh.vertex_positions = model->vertex_positions.device_address(); - gpu_mesh.vertex_normals = model->vertex_normals.device_address(); - gpu_mesh.texture_coords = model->texture_coords.device_address(); - gpu_mesh.local_triangle_indices = model->local_triangle_indices.device_address(); - gpu_mesh.indirect_vertex_indices = model->indirect_vertex_indices.device_address(); - gpu_mesh.meshlet_bounds = model->meshlet_bounds.device_address(); - gpu_mesh.meshlets = model->meshlets.device_address(); + const auto &gpu_mesh = model->gpu_meshes[rendering_mesh.n1]; + auto mesh_index = static_cast(gpu_meshes.size()); + gpu_meshes.emplace_back(gpu_mesh); // ── INSTANCING ────────────────────────────────────────────────────── for (const auto transform_id : transform_ids) { - for (const auto primitive_index : mesh.primitive_indices) { - auto &primitive = model->primitives[primitive_index]; - for (u32 meshlet_index = 0; meshlet_index < primitive.meshlet_count; meshlet_index++) { - auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_index = mesh_offset; - meshlet_instance.material_index = primitive.material_index; - meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; - meshlet_instance.meshlet_index = meshlet_index + primitive.meshlet_offset; - } + for (u32 meshlet_index = 0; meshlet_index < gpu_mesh.meshlet_count; meshlet_index++) { + auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); + meshlet_instance.mesh_index = mesh_index; + meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; + meshlet_instance.meshlet_index = meshlet_index; } } } From de901fc11cbdfeac58d49c268f3d7905b16fdcc3 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 24 Jul 2025 12:07:05 +0300 Subject: [PATCH 03/16] add use llvmpipe option --- Lorr/Engine/Asset/Asset.cc | 21 ++++-------- Lorr/Engine/Asset/Model.hh | 10 ------ Lorr/Engine/Graphics/Vulkan/Device.cc | 47 ++++++++++++--------------- Lorr/Engine/xmake.lua | 1 + xmake/options.lua | 6 ++++ 5 files changed, 35 insertions(+), 50 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 9be8d800..2bb2911b 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -909,16 +909,9 @@ auto AssetManager::unload_model(const UUID &uuid) -> bool { this->unload_material(v); } - impl->device->destroy(model->indices.id()); - impl->device->destroy(model->vertex_positions.id()); - impl->device->destroy(model->vertex_normals.id()); - if (model->texture_coords) { - impl->device->destroy(model->texture_coords.id()); + for (const auto &buffer : model->gpu_mesh_buffers) { + impl->device->destroy(buffer.id()); } - impl->device->destroy(model->meshlets.id()); - impl->device->destroy(model->meshlet_bounds.id()); - impl->device->destroy(model->local_triangle_indices.id()); - impl->device->destroy(model->indirect_vertex_indices.id()); impl->models.destroy_slot(asset->model_id); asset->model_id = ModelID::Invalid; @@ -1582,11 +1575,11 @@ auto AssetManager::get_materials_buffer() -> vuk::Value { auto occlusion_image_index = uuid_to_index(material->occlusion_texture); auto flags = GPU::MaterialFlag::None; - // flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; - // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; + flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; // flags |= GPU::MaterialFlag::NormalFlipY; return { diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index 6ad32dc7..8c3c1888 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -103,15 +103,5 @@ struct Model { std::vector gpu_mesh_buffers = {}; usize default_scene_index = 0; - - // TODO: Make this one buffer - Buffer indices = {}; - Buffer vertex_positions = {}; - Buffer vertex_normals = {}; - Buffer texture_coords = {}; - Buffer meshlets = {}; - Buffer meshlet_bounds = {}; - Buffer local_triangle_indices = {}; - Buffer indirect_vertex_indices = {}; }; } // namespace lr diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 170039b9..773c7c93 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -78,7 +78,11 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected device_extensions; device_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); @@ -88,7 +92,7 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected std::expected()); self.runtime.emplace( diff --git a/Lorr/Engine/xmake.lua b/Lorr/Engine/xmake.lua index 8038a0ef..43deef24 100755 --- a/Lorr/Engine/xmake.lua +++ b/Lorr/Engine/xmake.lua @@ -22,6 +22,7 @@ target("Lorr") }) add_options("profile") + add_options("use_llvmpipe") add_deps( "ls", diff --git a/xmake/options.lua b/xmake/options.lua index 3158a491..3d28b3a7 100755 --- a/xmake/options.lua +++ b/xmake/options.lua @@ -7,3 +7,9 @@ option("profile") end option_end() +option("use_llvmpipe") + set_default(false) + set_description("Select CPU graphics device.") + add_defines("LR_USE_LLVMPIPE=1", { public = true }) +option_end() + From 2574ab46aa957b5e7d97d3dd26e838ec492d3fce Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 25 Jul 2025 00:58:58 +0300 Subject: [PATCH 04/16] fix mesh instancing --- Lorr/Engine/Asset/Asset.cc | 25 +++++----- Lorr/Engine/Asset/Model.hh | 1 + Lorr/Engine/Graphics/Vulkan/Device.cc | 4 +- .../shaders/passes/cull_meshlets.slang | 18 ++++--- .../shaders/passes/cull_triangles.slang | 11 ++--- .../shaders/passes/editor_mousepick.slang | 8 +--- .../Resources/shaders/passes/visbuffer.slang | 44 +---------------- .../shaders/passes/visbuffer_decode.slang | 6 +-- .../shaders/passes/visbuffer_encode.slang | 16 +++---- .../shaders/passes/visbuffer_merge.slang | 37 --------------- Lorr/Engine/Resources/shaders/scene.slang | 3 +- Lorr/Engine/Scene/GPUScene.hh | 3 +- Lorr/Engine/Scene/Scene.cc | 23 +++++---- Lorr/Engine/Scene/SceneRenderer.cc | 47 +++++++------------ 14 files changed, 76 insertions(+), 170 deletions(-) delete mode 100644 Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 2bb2911b..64bdae7c 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -678,6 +678,8 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto &mesh = info->model->meshes[mesh_index]; auto primitive_index = info->model->primitives.size(); + info->model->gpu_meshes.emplace_back(); + info->model->gpu_mesh_buffers.emplace_back(); auto &primitive = info->model->primitives.emplace_back(); auto *material_asset = app.asset_man.get_asset(info->model->materials[material_index]); @@ -754,8 +756,8 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { ZoneNamedN(z, "GPU Meshlet Generation", true); auto &primitive = model->primitives[primitive_index]; - auto &gpu_mesh = model->gpu_meshes.emplace_back(); - auto &gpu_mesh_buffer = model->gpu_mesh_buffers.emplace_back(); + auto &gpu_mesh = model->gpu_meshes[primitive_index]; + auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; auto indices = std::vector(); auto vertices = std::vector(); @@ -764,10 +766,10 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { { ZoneNamedN(z2, "Remap geometry", true); - auto raw_indices = std::span(model_indices.data() + primitive.index_offset, primitive.index_count); - auto raw_vertices = std::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_normals = std::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_texcoords = std::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); + auto raw_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); + auto raw_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); + auto raw_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); + auto raw_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); // clang-format off auto remapped_vertices = std::vector(raw_vertices.size()); @@ -779,8 +781,10 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { normals.resize(vertex_count); meshopt_remapVertexBuffer(normals.data(), raw_normals.data(), raw_normals.size(), sizeof(glm::vec3), remapped_vertices.data()); - texcoords.resize(vertex_count); - meshopt_remapVertexBuffer(texcoords.data(), raw_texcoords.data(), raw_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); + if (!raw_texcoords.empty()) { + texcoords.resize(vertex_count); + meshopt_remapVertexBuffer(texcoords.data(), raw_texcoords.data(), raw_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); + } indices.resize(raw_indices.size()); meshopt_remapIndexBuffer(indices.data(), raw_indices.data(), primitive.index_count, remapped_vertices.data()); @@ -885,10 +889,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { std::memcpy(cpu_mesh_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); upload_offset += ls::size_bytes(indirect_vertex_indices); - gpu_mesh.material_index = SlotMap_decode_id(primitive.material_id).index; - gpu_mesh.meshlet_count = meshlet_count; - transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + + primitive.meshlet_count = meshlet_count; } } diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index 8c3c1888..a7ccf2ab 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -67,6 +67,7 @@ struct Model { struct Primitive { MaterialID material_id = MaterialID::Invalid; + u32 meshlet_count = 0; u32 vertex_count = 0; u32 vertex_offset = 0; u32 index_count = 0; diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 773c7c93..7947871e 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -60,7 +60,7 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expectedprojection_view_mat, transform.world); + model_view_proj_shared = mul(params.camera.projection_view_mat, transform.world); } std::control_barrier(std::memory_order_acq_rel); @@ -151,8 +150,8 @@ func cs_main( if (triangle_passed) { let index_offset = base_index_shared + active_triangle_index * 3; - params.reordered_indices[index_offset + 0] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 1] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 2] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); } } diff --git a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang index 3dfb215c..a56a3c89 100644 --- a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang +++ b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang @@ -10,9 +10,6 @@ import passes.visbuffer; Image2D visbuffer_data; [[vk::binding(1, 0)]] -StructuredBuffer visible_meshlet_instances_indices; - -[[vk::binding(2, 0)]] StructuredBuffer meshlet_instances; struct PushConstants { @@ -31,8 +28,7 @@ func cs_main() -> void { return; } - const let vis = VisBufferData(texel); - const u32 meshlet_instance_index = visible_meshlet_instances_indices[vis.meshlet_instance_index]; - const MeshletInstance meshlet_instance = meshlet_instances[meshlet_instance_index]; + let vis = VisBufferData(texel); + let meshlet_instance = meshlet_instances[vis.meshlet_instance_index]; *C.dst = meshlet_instance.transform_index; } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer.slang index 80183ef1..8552fbd1 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer.slang @@ -3,9 +3,6 @@ module visbuffer; import std; import scene; -constexpr static u64 MESHLET_DEPTH_BITS = 32u; -constexpr static u64 MESHLET_DEPTH_MASK = (1u << MESHLET_DEPTH_BITS) - 1u; - constexpr static u32 MESHLET_INSTANCE_ID_BITS = 24u; constexpr static u32 MESHLET_INSTANCE_ID_MASK = (1u << MESHLET_INSTANCE_ID_BITS) - 1u; @@ -31,43 +28,4 @@ public struct VisBufferData { public func encode() -> u32 { return (this.meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | (this.triangle_index & MESHLET_PRIMITIVE_MASK); } -}; - -// NOTE: This version of visibility buffering is not widely available. -// My current idea is to have 2 (R32, D32) attachments and render them -// just normally, and then have a compute pass to merge them into R64 -// image. R64 images are not widely available even with storage only. -// Investigate maintenance8 for D32<->R32 copies. - -public struct VisBuffer { - u32 data; - public f32 depth; - - [[mutating]] - public __init(u32 meshlet_instance_index, u32 triangle_index, f32 depth) { - const let data = VisBufferData(meshlet_instance_index, triangle_index); - this.data = data.encode(); - this.depth = depth; - } - - [[mutating]] - public __init(in VisBufferData data, f32 depth) { - this.data = data.encode(); - this.depth = depth; - } - - [[mutating]] - public __init(u64 data) { - this.data = u32(data & MESHLET_DEPTH_MASK); - this.depth = asfloat(u32(data >> MESHLET_DEPTH_BITS)); - } - - public func encode() -> u64 { - return (u64(asuint(this.depth)) << MESHLET_DEPTH_BITS) | u64(this.data); - } - - public func decode_vis() -> VisBufferData { - return VisBufferData(this.data); - }; -}; - +}; \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 2d7d11bb..dc553bf7 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -92,13 +92,11 @@ func fs_main(VertexOutput input) -> FragmentOutput { discard; } - FragmentOutput output = {}; - let vis = VisBufferData(texel); let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; - let material = params.materials[mesh.material_index]; + let material = params.materials[meshlet_instance.material_index]; let transform = params.transforms[meshlet_instance.transform_index]; let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; @@ -111,6 +109,8 @@ func fs_main(VertexOutput input) -> FragmentOutput { let deriv = compute_partial_derivatives(world_positions, NDC.xy, params.camera.resolution); let tex_coord_grad = deriv.gradient_of(tex_coords); + FragmentOutput output = {}; + // ALBEDO ─────────────────────────────────────────────────────────── output.albedo_color = material.sample_albedo_color(tex_coord_grad); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 4174ae27..28f7af09 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -7,7 +7,6 @@ import passes.visbuffer; struct ShaderParameters { ConstantBuffer camera; - StructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; StructuredBuffer meshes; StructuredBuffer transforms; @@ -28,8 +27,7 @@ struct VertexOutput { [[shader("vertex")]] func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { let vis = VisBufferData(vertex_index); - let meshlet_instance_index = params.visible_meshlet_instances_indices[vis.meshlet_instance_index]; - let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; + let meshlet_instance = params.meshlet_instances[vis.meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; let transform = params.transforms[meshlet_instance.transform_index]; let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; @@ -44,9 +42,9 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.position = clip_pos; output.vertex_pos = vertex_pos; output.tex_coord = tex_coord; - output.meshlet_instance_index = meshlet_instance_index; + output.meshlet_instance_index = vis.meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; - output.material_index = mesh.material_index; + output.material_index = meshlet_instance.material_index; return output; } @@ -59,18 +57,16 @@ func fs_main(VertexOutput input) -> u32 { grad.uv = input.tex_coord; grad.ddx = ddx(input.tex_coord); grad.ddy = ddy(input.tex_coord); - const f32 alpha_color = material.sample_albedo_color(grad).a; + let alpha_color = material.sample_albedo_color(grad).a; // We are doing deferred, blend alpha mode is not supported in this pass. - if (alpha_color < clamp(material.alpha_cutoff, 0.001, 1.0) /* && - material.alpha_mode == AlphaMode::Mask*/) - { + if (alpha_color < clamp(material.alpha_cutoff, 0.001, 1.0)) { discard; } } std::atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, std::memory_order_acq_rel, std::MemoryLocation::Image, MemoryScope::QueueFamily); - const let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); + let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); return vis.encode(); } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang deleted file mode 100644 index caefe597..00000000 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang +++ /dev/null @@ -1,37 +0,0 @@ -module visbuffer_merge; - -import std; -import gpu; -import scene; -import passes.visbuffer; - -#include - -[[vk::binding(0, 0)]] -Image2D depth_image; - -[[vk::binding(1, 0)]] -Image2D visbuffer_data_image; - -[[vk::binding(2, 0)]] -StorageImage2D visbuffer_image; - -struct PushConstants { - u32x2 extent; -}; -[[vk::push_constant]] PushConstants C; - -[[shader("compute")]] -[[numthreads(16, 16, 1)]] -func cs_main(u32x2 thread_id : SV_DispatchThreadID) -> void { - if (any(C.extent < thread_id)) { - return; - } - - const f32 depth_texel = depth_image.load(thread_id.xy); - const u32 vis_texel = visbuffer_data_image.load(thread_id.xy); - - const let vis_data = VisBufferData(vis_texel); - const let vis = VisBuffer(vis_data, depth_texel); - visbuffer_image[thread_id.xy] = vis.encode(); -} diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 91455748..9e38944a 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -259,6 +259,7 @@ public struct MeshletBounds { public struct MeshletInstance { public u32 mesh_index = 0; public u32 transform_index = 0; + public u32 material_index = 0; public u32 meshlet_index = 0; }; @@ -271,8 +272,6 @@ public struct Mesh { public MeshletBounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; public u32 *indirect_vertex_indices = nullptr; - public u32 meshlet_count = 0; - public u32 material_index = 0; }; public struct Light { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 20e59559..1c98d924 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -158,6 +158,7 @@ struct MeshletBounds { struct MeshletInstance { alignas(4) u32 mesh_index = 0; alignas(4) u32 transform_index = 0; + alignas(4) u32 material_index = 0; alignas(4) u32 meshlet_index = 0; }; @@ -170,8 +171,6 @@ struct Mesh { alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; alignas(8) u64 indirect_vertex_indices = 0; - alignas(4) u32 meshlet_count = 0; - alignas(4) u32 material_index = 0; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 649afb5f..0f9a6bf8 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -732,17 +732,24 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { auto *model = app.asset_man.get_model(rendering_mesh.n0); - const auto &gpu_mesh = model->gpu_meshes[rendering_mesh.n1]; - auto mesh_index = static_cast(gpu_meshes.size()); - gpu_meshes.emplace_back(gpu_mesh); + const auto &mesh = model->meshes[rendering_mesh.n1]; // ── INSTANCING ────────────────────────────────────────────────────── for (const auto transform_id : transform_ids) { - for (u32 meshlet_index = 0; meshlet_index < gpu_mesh.meshlet_count; meshlet_index++) { - auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_index = mesh_index; - meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; - meshlet_instance.meshlet_index = meshlet_index; + for (auto primitive_index : mesh.primitive_indices) { + const auto &primitive = model->primitives[primitive_index]; + const auto &gpu_mesh = model->gpu_meshes[primitive_index]; + + auto mesh_index = static_cast(gpu_meshes.size()); + gpu_meshes.push_back(gpu_mesh); + + for (u32 meshlet_index = 0; meshlet_index < primitive.meshlet_count; meshlet_index++) { + auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); + meshlet_instance.mesh_index = mesh_index; + meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; + meshlet_instance.material_index = SlotMap_decode_id(primitive.material_id).index; + meshlet_instance.meshlet_index = meshlet_index; + } } } } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index f35fdc25..90622226 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -536,14 +536,14 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: "vis cull meshlets", [meshlet_instance_count = self.meshlet_instance_count, cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eComputeWrite) cull_triangles_cmd, + VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeRead) hiz, - VUK_BA(vuk::eComputeWrite) debug_drawer + VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // .bind_compute_pipeline("passes.cull_meshlets") @@ -558,6 +558,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_buffer(0, 8, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(meshlet_instance_count, cull_flags)) .dispatch((meshlet_instance_count + Model::MAX_MESHLET_INDICES - 1) / Model::MAX_MESHLET_INDICES); + return std::make_tuple( cull_triangles_cmd, camera, @@ -602,8 +603,8 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: [cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, - VUK_BA(vuk::eComputeWrite) draw_indexed_cmd, - VUK_BA(vuk::eComputeWrite) camera, + VUK_BA(vuk::eComputeRW) draw_indexed_cmd, + VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, VUK_BA(vuk::eComputeRead) meshes, @@ -621,6 +622,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_buffer(0, 6, reordered_indices) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) .dispatch_indirect(cull_triangles_cmd); + return std::make_tuple( draw_indexed_cmd, camera, @@ -707,10 +709,9 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, VUK_BA(vuk::eIndexRead) index_buffer, - VUK_IA(vuk::eColorWrite) visbuffer, + VUK_IA(vuk::eColorRW) visbuffer, VUK_IA(vuk::eDepthStencilRW) depth, VUK_BA(vuk::eVertexRead) camera, - VUK_BA(vuk::eVertexRead) visible_meshlet_instances_indices, VUK_BA(vuk::eVertexRead) meshlet_instances, VUK_BA(vuk::eVertexRead) transforms, VUK_BA(vuk::eVertexRead) meshes, @@ -727,25 +728,15 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .set_scissor(0, vuk::Rect2D::framebuffer()) .bind_persistent(1, *descriptor_set) .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) - .bind_buffer(0, 3, meshes) - .bind_buffer(0, 4, transforms) - .bind_buffer(0, 5, materials) - .bind_image(0, 6, overdraw) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, meshes) + .bind_buffer(0, 3, transforms) + .bind_buffer(0, 4, materials) + .bind_image(0, 5, overdraw) .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) .draw_indexed_indirect(1, triangle_indirect); - return std::make_tuple( - visbuffer, - depth, - camera, - visible_meshlet_instances_indices, - meshlet_instances, - transforms, - meshes, - materials, - overdraw - ); + + return std::make_tuple(visbuffer, depth, camera, meshlet_instances, transforms, meshes, materials, overdraw); } ); @@ -753,7 +744,6 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: visbuffer_attachment, depth_attachment, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, transforms_buffer, meshes_buffer, @@ -766,7 +756,6 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: std::move(visbuffer_attachment), std::move(depth_attachment), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), std::move(transforms_buffer), std::move(meshes_buffer), @@ -781,15 +770,13 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: [picking_texel = *info.picking_texel]( vuk::CommandBuffer &cmd_list, VUK_IA(vuk::eComputeSampled) visbuffer, - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, VUK_BA(vuk::eComputeWrite) picked_transform_index_buffer ) { cmd_list // .bind_compute_pipeline("passes.editor_mousepick") .bind_image(0, 0, visbuffer) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 1, meshlet_instances) .push_constants( vuk::ShaderStageFlagBits::eCompute, 0, @@ -802,8 +789,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: ); auto picking_texel_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUtoCPU, sizeof(u32)); - auto picked_texel = - editor_mousepick_pass(visbuffer_attachment, visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, picking_texel_buffer); + auto picked_texel = editor_mousepick_pass(visbuffer_attachment, meshlet_instances_buffer, picking_texel_buffer); vuk::Compiler temp_compiler; picked_texel.wait(self.device->get_allocator(), temp_compiler); @@ -1013,6 +999,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_buffer(0, 10, sun) .bind_buffer(0, 11, camera) .draw(3, 1, 0, 0); + return std::make_tuple(dst, atmosphere, sun, camera, sky_transmittance_lut, sky_multiscatter_lut, depth); } ); From 87a282a35b096544ee28bb1e94e38d573cc76249 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:04:47 +0300 Subject: [PATCH 05/16] prepare for lods --- Lorr/Engine/Asset/Asset.cc | 271 ++++++++++++---------- Lorr/Engine/Resources/shaders/scene.slang | 11 + Lorr/Engine/Scene/GPUScene.hh | 13 ++ Lorr/Engine/Scene/Scene.cc | 9 +- Lorr/Engine/Scene/SceneRenderer.cc | 1 + 5 files changed, 175 insertions(+), 130 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 64bdae7c..749e615b 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -674,12 +674,12 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto *info = static_cast(user_data); if (info->model->meshes.size() <= mesh_index) { info->model->meshes.resize(mesh_index + 1); + info->model->gpu_meshes.resize(mesh_index + 1); + info->model->gpu_mesh_buffers.resize(mesh_index + 1); } auto &mesh = info->model->meshes[mesh_index]; auto primitive_index = info->model->primitives.size(); - info->model->gpu_meshes.emplace_back(); - info->model->gpu_mesh_buffers.emplace_back(); auto &primitive = info->model->primitives.emplace_back(); auto *material_asset = app.asset_man.get_asset(info->model->materials[material_index]); @@ -727,6 +727,8 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { return false; } + auto &transfer_man = impl->device->transfer_man(); + // ── SCENE HIERARCHY ───────────────────────────────────────────────── for (const auto &node : gltf_model->nodes) { model->nodes.push_back( @@ -750,149 +752,168 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto model_normals = std::move(gltf_callbacks.vertex_normals); auto model_texcoords = std::move(gltf_callbacks.vertex_texcoords); - auto &transfer_man = impl->device->transfer_man(); - for (const auto &gltf_mesh : model->meshes) { - for (auto primitive_index : gltf_mesh.primitive_indices) { - ZoneNamedN(z, "GPU Meshlet Generation", true); - - auto &primitive = model->primitives[primitive_index]; - auto &gpu_mesh = model->gpu_meshes[primitive_index]; - auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; - - auto indices = std::vector(); - auto vertices = std::vector(); - auto normals = std::vector(); - auto texcoords = std::vector(); - { - ZoneNamedN(z2, "Remap geometry", true); - - auto raw_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); - auto raw_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); + // for each model (aka gltf scene): + // - for each mesh: + // - - for each lod: + // - - - for each primitive: + // - - - - generate lods + // - - - - optimize and remap geometry + // - - - - calculate meshlets and bounds + // + for (const auto &[mesh, gpu_mesh, gpu_mesh_buffer] : std::views::zip(model->meshes, model->gpu_meshes, model->gpu_mesh_buffers)) { + auto mesh_indices = std::vector(); + auto mesh_vertices = std::vector(); + auto mesh_normals = std::vector(); + auto mesh_texcoords = std::vector(); + auto mesh_meshlets = std::vector(); + auto mesh_meshlet_bounds = std::vector(); + auto mesh_local_triangle_indices = std::vector(); + auto mesh_indirect_vertex_indices = std::vector(); + + for (auto lod_index = 0_sz; lod_index < GPU::Mesh::MAX_LODS; lod_index++) { + for (auto primitive_index : mesh.primitive_indices) { + ZoneNamedN(z, "GPU Meshlet Generation", true); + + auto &primitive = model->primitives[primitive_index]; + auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); + auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); // clang-format off - auto remapped_vertices = std::vector(raw_vertices.size()); - auto vertex_count = meshopt_optimizeVertexFetchRemap(remapped_vertices.data(), raw_indices.data(), raw_indices.size(), primitive.vertex_count); + auto remapped_vertices = std::vector(primitive_vertices.size()); + auto vertex_count = meshopt_optimizeVertexFetchRemap(remapped_vertices.data(), primitive_indices.data(), primitive_indices.size(), primitive.vertex_count); - vertices.resize(vertex_count); - meshopt_remapVertexBuffer(vertices.data(), raw_vertices.data(), raw_vertices.size(), sizeof(glm::vec3), remapped_vertices.data()); + auto vertices = std::vector(vertex_count); + meshopt_remapVertexBuffer(vertices.data(), primitive_vertices.data(), primitive_vertices.size(), sizeof(glm::vec3), remapped_vertices.data()); - normals.resize(vertex_count); - meshopt_remapVertexBuffer(normals.data(), raw_normals.data(), raw_normals.size(), sizeof(glm::vec3), remapped_vertices.data()); + auto normals = std::vector(vertex_count); + meshopt_remapVertexBuffer(normals.data(), primitive_normals.data(), primitive_normals.size(), sizeof(glm::vec3), remapped_vertices.data()); - if (!raw_texcoords.empty()) { + auto texcoords = std::vector(); + if (!primitive_texcoords.empty()) { texcoords.resize(vertex_count); - meshopt_remapVertexBuffer(texcoords.data(), raw_texcoords.data(), raw_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); + meshopt_remapVertexBuffer(texcoords.data(), primitive_texcoords.data(), primitive_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); } - indices.resize(raw_indices.size()); - meshopt_remapIndexBuffer(indices.data(), raw_indices.data(), primitive.index_count, remapped_vertices.data()); - // clang-format on + auto indices = std::vector(primitive_indices.size()); + meshopt_remapIndexBuffer(indices.data(), primitive_indices.data(), primitive.index_count, remapped_vertices.data()); { - auto optimized_indices = std::vector(raw_indices.size()); + auto optimized_indices = std::vector(primitive_indices.size()); meshopt_optimizeVertexCache(optimized_indices.data(), indices.data(), indices.size(), vertex_count); indices = std::move(optimized_indices); } - } - // Worst case count - auto max_meshlet_count = meshopt_buildMeshletsBound(indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); - auto meshlets = std::vector(max_meshlet_count); - auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); - auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); - - auto meshlet_count = meshopt_buildMeshlets( - meshlets.data(), - indirect_vertex_indices.data(), - local_triangle_indices.data(), - indices.data(), - indices.size(), - reinterpret_cast(vertices.data()), - vertices.size(), - sizeof(glm::vec3), - Model::MAX_MESHLET_INDICES, - Model::MAX_MESHLET_PRIMITIVES, - 0.0 - ); + // Worst case count + auto max_meshlet_count = meshopt_buildMeshletsBound(indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); + auto meshlets = std::vector(max_meshlet_count); + auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); + auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); + + auto meshlet_count = meshopt_buildMeshlets( + meshlets.data(), + indirect_vertex_indices.data(), + local_triangle_indices.data(), + indices.data(), + indices.size(), + reinterpret_cast(vertices.data()), + vertices.size(), + sizeof(glm::vec3), + Model::MAX_MESHLET_INDICES, + Model::MAX_MESHLET_PRIMITIVES, + 0.0 + ); - // Trim meshlets from worst case to current case - meshlets.resize(meshlet_count); - const auto &last_meshlet = meshlets[meshlet_count - 1]; - indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); - local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); - - auto meshlet_bounds = std::vector(meshlet_count); - for (const auto &[meshlet, meshlet_aabb] : std::views::zip(meshlets, meshlet_bounds)) { - // AABB computation - auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); - auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); - for (u32 i = 0; i < meshlet.triangle_count * 3; i++) { - const auto &tri_pos = - vertices[indirect_vertex_indices[meshlet.vertex_offset + local_triangle_indices[meshlet.triangle_offset + i]]]; - meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); - meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); + // Trim meshlets from worst case to current case + meshlets.resize(meshlet_count); + const auto &last_meshlet = meshlets[meshlet_count - 1]; + indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); + local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); + + auto meshlet_bounds = std::vector(meshlet_count); + for (const auto &[meshlet, meshlet_aabb] : std::views::zip(meshlets, meshlet_bounds)) { + // AABB computation + auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); + auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); + for (u32 i = 0; i < meshlet.triangle_count * 3; i++) { + const auto &tri_pos = + vertices[indirect_vertex_indices[meshlet.vertex_offset + local_triangle_indices[meshlet.triangle_offset + i]]]; + meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); + meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); + } + + meshlet.triangle_offset += mesh_local_triangle_indices.size(); + meshlet.vertex_offset += mesh_indirect_vertex_indices.size(); + + meshlet_aabb.aabb_min = meshlet_bb_min; + meshlet_aabb.aabb_max = meshlet_bb_max; } - meshlet_aabb.aabb_min = meshlet_bb_min; - meshlet_aabb.aabb_max = meshlet_bb_max; - } + primitive.meshlet_count = meshlet_count; - auto upload_size = 0 // - + ls::size_bytes(indices) // - + ls::size_bytes(vertices) // - + ls::size_bytes(normals) // - + ls::size_bytes(texcoords) // - + ls::size_bytes(meshlets) // - + ls::size_bytes(meshlet_bounds) // - + ls::size_bytes(local_triangle_indices) // - + ls::size_bytes(indirect_vertex_indices); - gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); - auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); - - auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); - auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); - auto upload_offset = 0_u64; - - gpu_mesh.indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, indices.data(), ls::size_bytes(indices)); - upload_offset += ls::size_bytes(indices); - - gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, vertices.data(), ls::size_bytes(vertices)); - upload_offset += ls::size_bytes(vertices); - - gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, normals.data(), ls::size_bytes(normals)); - upload_offset += ls::size_bytes(normals); - - if (!texcoords.empty()) { - gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, texcoords.data(), ls::size_bytes(texcoords)); - upload_offset += ls::size_bytes(texcoords); + std::ranges::move(indices, std::back_inserter(mesh_indices)); + std::ranges::move(vertices, std::back_inserter(mesh_vertices)); + std::ranges::move(normals, std::back_inserter(mesh_normals)); + std::ranges::move(texcoords, std::back_inserter(mesh_texcoords)); + std::ranges::move(meshlets, std::back_inserter(mesh_meshlets)); + std::ranges::move(meshlet_bounds, std::back_inserter(mesh_meshlet_bounds)); + std::ranges::move(local_triangle_indices, std::back_inserter(mesh_local_triangle_indices)); + std::ranges::move(indirect_vertex_indices, std::back_inserter(mesh_indirect_vertex_indices)); } + } - gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, meshlets.data(), ls::size_bytes(meshlets)); - upload_offset += ls::size_bytes(meshlets); + auto upload_size = 0 // + + ls::size_bytes(mesh_indices) // + + ls::size_bytes(mesh_vertices) // + + ls::size_bytes(mesh_normals) // + + ls::size_bytes(mesh_texcoords) // + + ls::size_bytes(mesh_meshlets) // + + ls::size_bytes(mesh_meshlet_bounds) // + + ls::size_bytes(mesh_local_triangle_indices) // + + ls::size_bytes(mesh_indirect_vertex_indices); + gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); + auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); + + auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); + auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); + auto upload_offset = 0_u64; + + gpu_mesh.indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indices.data(), ls::size_bytes(mesh_indices)); + upload_offset += ls::size_bytes(mesh_indices); + + gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); + upload_offset += ls::size_bytes(mesh_vertices); + + gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); + upload_offset += ls::size_bytes(mesh_normals); + + if (!mesh_texcoords.empty()) { + gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); + upload_offset += ls::size_bytes(mesh_texcoords); + } - gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, meshlet_bounds.data(), ls::size_bytes(meshlet_bounds)); - upload_offset += ls::size_bytes(meshlet_bounds); + gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlets.data(), ls::size_bytes(mesh_meshlets)); + upload_offset += ls::size_bytes(mesh_meshlets); - gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, local_triangle_indices.data(), ls::size_bytes(local_triangle_indices)); - upload_offset += ls::size_bytes(local_triangle_indices); + gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlet_bounds.data(), ls::size_bytes(mesh_meshlet_bounds)); + upload_offset += ls::size_bytes(mesh_meshlet_bounds); - gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); - upload_offset += ls::size_bytes(indirect_vertex_indices); + gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_local_triangle_indices.data(), ls::size_bytes(mesh_local_triangle_indices)); + upload_offset += ls::size_bytes(mesh_local_triangle_indices); - transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indirect_vertex_indices.data(), ls::size_bytes(mesh_indirect_vertex_indices)); + upload_offset += ls::size_bytes(mesh_indirect_vertex_indices); - primitive.meshlet_count = meshlet_count; - } + transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); } return true; @@ -1578,11 +1599,11 @@ auto AssetManager::get_materials_buffer() -> vuk::Value { auto occlusion_image_index = uuid_to_index(material->occlusion_texture); auto flags = GPU::MaterialFlag::None; - flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; - flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + // flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; + // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; // flags |= GPU::MaterialFlag::NormalFlipY; return { diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 9e38944a..95289721 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -263,6 +263,14 @@ public struct MeshletInstance { public u32 meshlet_index = 0; }; +public struct MeshLOD { + public u32 index_offset = 0; + public u32 index_count = 0; + public u32 meshlet_offset = 0; + public u32 meshlet_count = 0; + public f32 error = 0.0; +}; + public struct Mesh { public u32 *indices = nullptr; public f32x3 *vertex_positions = nullptr; @@ -272,6 +280,9 @@ public struct Mesh { public MeshletBounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; public u32 *indirect_vertex_indices = nullptr; + public u32 lod_count = 0; + public u32 _padding = 0; + public MeshLOD lods[MESH_MAX_LODS] = {}; }; public struct Light { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 1c98d924..bebcd82e 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -162,7 +162,17 @@ struct MeshletInstance { alignas(4) u32 meshlet_index = 0; }; +struct MeshLOD { + alignas(4) u32 index_offset = 0; + alignas(4) u32 index_count = 0; + alignas(4) u32 meshlet_offset = 0; + alignas(4) u32 meshlet_count = 0; + alignas(4) f32 error = 0.0f; +}; + struct Mesh { + constexpr static auto MAX_LODS = 1_sz; + alignas(8) u64 indices = 0; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; @@ -171,6 +181,9 @@ struct Mesh { alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; alignas(8) u64 indirect_vertex_indices = 0; + alignas(4) u32 lod_count = 0; + alignas(4) u32 padding = 0; // we want to be extra safe here + alignas(4) MeshLOD lods[MAX_LODS] = {}; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 0f9a6bf8..93a3ffb7 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -731,18 +731,17 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { auto gpu_meshlet_instances = std::vector(); for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { + auto mesh_index = static_cast(gpu_meshes.size()); + auto *model = app.asset_man.get_model(rendering_mesh.n0); + const auto &gpu_mesh = model->gpu_meshes[rendering_mesh.n1]; const auto &mesh = model->meshes[rendering_mesh.n1]; + gpu_meshes.push_back(gpu_mesh); // ── INSTANCING ────────────────────────────────────────────────────── for (const auto transform_id : transform_ids) { for (auto primitive_index : mesh.primitive_indices) { const auto &primitive = model->primitives[primitive_index]; - const auto &gpu_mesh = model->gpu_meshes[primitive_index]; - - auto mesh_index = static_cast(gpu_meshes.size()); - gpu_meshes.push_back(gpu_mesh); - for (u32 meshlet_index = 0; meshlet_index < primitive.meshlet_count; meshlet_index++) { auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); meshlet_instance.mesh_index = mesh_index; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 90622226..52f0514d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -35,6 +35,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .definitions = { { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, { "CULLING_TRIANGLE_COUNT", std::to_string(Model::MAX_MESHLET_PRIMITIVES) }, + { "MESH_MAX_LODS", std::to_string(GPU::Mesh::MAX_LODS) }, { "HISTOGRAM_THREADS_X", std::to_string(GPU::HISTOGRAM_THREADS_X) }, { "HISTOGRAM_THREADS_Y", std::to_string(GPU::HISTOGRAM_THREADS_Y) }, }, From 7e14969c728883763cb6401db2a5be8c5bd32027 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 26 Jul 2025 00:30:05 +0300 Subject: [PATCH 06/16] add lod generation --- Lorr/Engine/Asset/Asset.cc | 263 +++++++++++++--------- Lorr/Engine/Asset/Model.hh | 1 - Lorr/Engine/Resources/shaders/scene.slang | 24 +- Lorr/Engine/Scene/GPUScene.hh | 11 +- Lorr/Engine/Scene/Scene.cc | 21 +- xmake/packages.lua | 2 +- 6 files changed, 197 insertions(+), 125 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 749e615b..51e87467 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -674,8 +674,6 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto *info = static_cast(user_data); if (info->model->meshes.size() <= mesh_index) { info->model->meshes.resize(mesh_index + 1); - info->model->gpu_meshes.resize(mesh_index + 1); - info->model->gpu_mesh_buffers.resize(mesh_index + 1); } auto &mesh = info->model->meshes[mesh_index]; @@ -683,6 +681,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto &primitive = info->model->primitives.emplace_back(); auto *material_asset = app.asset_man.get_asset(info->model->materials[material_index]); + info->model->gpu_meshes.emplace_back(); + info->model->gpu_mesh_buffers.emplace_back(); + info->vertex_positions.resize(info->vertex_positions.size() + vertex_count); info->vertex_normals.resize(info->vertex_normals.size() + vertex_count); info->vertex_texcoords.resize(info->vertex_texcoords.size() + vertex_count); @@ -754,71 +755,128 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { // for each model (aka gltf scene): // - for each mesh: - // - - for each lod: - // - - - for each primitive: + // - - for each primitive: + // - - - for each lod: // - - - - generate lods // - - - - optimize and remap geometry // - - - - calculate meshlets and bounds // - for (const auto &[mesh, gpu_mesh, gpu_mesh_buffer] : std::views::zip(model->meshes, model->gpu_meshes, model->gpu_mesh_buffers)) { - auto mesh_indices = std::vector(); - auto mesh_vertices = std::vector(); - auto mesh_normals = std::vector(); - auto mesh_texcoords = std::vector(); - auto mesh_meshlets = std::vector(); - auto mesh_meshlet_bounds = std::vector(); - auto mesh_local_triangle_indices = std::vector(); - auto mesh_indirect_vertex_indices = std::vector(); - - for (auto lod_index = 0_sz; lod_index < GPU::Mesh::MAX_LODS; lod_index++) { - for (auto primitive_index : mesh.primitive_indices) { + for (const auto &mesh : model->meshes) { + for (auto primitive_index : mesh.primitive_indices) { + auto &primitive = model->primitives[primitive_index]; + auto &gpu_mesh = model->gpu_meshes[primitive_index]; + auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; + + auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); + auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); + + auto remapped_vertices = std::vector(primitive_vertices.size()); + auto vertex_count = meshopt_optimizeVertexFetchRemap( + remapped_vertices.data(), + primitive_indices.data(), + primitive_indices.size(), + primitive.vertex_count + ); + + auto mesh_vertices = std::vector(vertex_count); + meshopt_remapVertexBuffer( + mesh_vertices.data(), + primitive_vertices.data(), + primitive_vertices.size(), + sizeof(glm::vec3), + remapped_vertices.data() + ); + + auto mesh_normals = std::vector(vertex_count); + meshopt_remapVertexBuffer( + mesh_normals.data(), + primitive_normals.data(), + primitive_normals.size(), + sizeof(glm::vec3), + remapped_vertices.data() + ); + + auto mesh_texcoords = std::vector(); + if (!primitive_texcoords.empty()) { + mesh_texcoords.resize(vertex_count); + meshopt_remapVertexBuffer( + mesh_texcoords.data(), + primitive_texcoords.data(), + primitive_texcoords.size(), + sizeof(glm::vec2), + remapped_vertices.data() + ); + } + + auto mesh_indices = std::vector(); + auto mesh_meshlets = std::vector(); + auto mesh_meshlet_bounds = std::vector(); + auto mesh_local_triangle_indices = std::vector(); + auto mesh_indirect_vertex_indices = std::vector(); + + auto last_lod_indices = std::vector(); + for (auto lod_index = 0_sz; lod_index < GPU::Mesh::MAX_LODS; lod_index++) { ZoneNamedN(z, "GPU Meshlet Generation", true); - auto &primitive = model->primitives[primitive_index]; - auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); - auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); - auto primitive_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); - auto primitive_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); - - // clang-format off - auto remapped_vertices = std::vector(primitive_vertices.size()); - auto vertex_count = meshopt_optimizeVertexFetchRemap(remapped_vertices.data(), primitive_indices.data(), primitive_indices.size(), primitive.vertex_count); - - auto vertices = std::vector(vertex_count); - meshopt_remapVertexBuffer(vertices.data(), primitive_vertices.data(), primitive_vertices.size(), sizeof(glm::vec3), remapped_vertices.data()); - - auto normals = std::vector(vertex_count); - meshopt_remapVertexBuffer(normals.data(), primitive_normals.data(), primitive_normals.size(), sizeof(glm::vec3), remapped_vertices.data()); - - auto texcoords = std::vector(); - if (!primitive_texcoords.empty()) { - texcoords.resize(vertex_count); - meshopt_remapVertexBuffer(texcoords.data(), primitive_texcoords.data(), primitive_texcoords.size(), sizeof(glm::vec2), remapped_vertices.data()); + auto &cur_lod = gpu_mesh.lods[gpu_mesh.lod_count++]; + + auto simplified_indices = std::vector(); + if (lod_index == 0) { + simplified_indices = std::vector(primitive_indices.begin(), primitive_indices.end()); + } else { + auto lod_index_count = (static_cast(static_cast(last_lod_indices.size()) * 0.65f) / 3_sz) * 3_sz; + simplified_indices.resize(last_lod_indices.size(), 0_u32); + const auto target_error = 1e-1f; + + auto result_error = 0.0f; + auto result_index_count = meshopt_simplify( + simplified_indices.data(), + last_lod_indices.data(), + last_lod_indices.size(), + reinterpret_cast(primitive_vertices.data()), + primitive_vertices.size(), + sizeof(glm::vec3), + lod_index_count, + target_error, + 0, + &result_error + ); + + if (result_index_count == last_lod_indices.size() || result_index_count == 0) { + // Error bound + break; + } + + simplified_indices.resize(result_index_count); } - auto indices = std::vector(primitive_indices.size()); - meshopt_remapIndexBuffer(indices.data(), primitive_indices.data(), primitive.index_count, remapped_vertices.data()); + last_lod_indices = simplified_indices; + + auto indices = std::vector(simplified_indices.size()); + meshopt_remapIndexBuffer(indices.data(), simplified_indices.data(), simplified_indices.size(), remapped_vertices.data()); { - auto optimized_indices = std::vector(primitive_indices.size()); + auto optimized_indices = std::vector(indices.size()); meshopt_optimizeVertexCache(optimized_indices.data(), indices.data(), indices.size(), vertex_count); indices = std::move(optimized_indices); } // Worst case count auto max_meshlet_count = meshopt_buildMeshletsBound(indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); - auto meshlets = std::vector(max_meshlet_count); + auto raw_meshlets = std::vector(max_meshlet_count); auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); auto meshlet_count = meshopt_buildMeshlets( - meshlets.data(), + raw_meshlets.data(), indirect_vertex_indices.data(), local_triangle_indices.data(), indices.data(), indices.size(), - reinterpret_cast(vertices.data()), - vertices.size(), + reinterpret_cast(mesh_vertices.data()), + mesh_vertices.size(), sizeof(glm::vec3), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES, @@ -826,94 +884,97 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { ); // Trim meshlets from worst case to current case - meshlets.resize(meshlet_count); - const auto &last_meshlet = meshlets[meshlet_count - 1]; + raw_meshlets.resize(meshlet_count); + auto meshlets = std::vector(meshlet_count); + const auto &last_meshlet = raw_meshlets[meshlet_count - 1]; indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); auto meshlet_bounds = std::vector(meshlet_count); - for (const auto &[meshlet, meshlet_aabb] : std::views::zip(meshlets, meshlet_bounds)) { + for (const auto &[raw_meshlet, meshlet, meshlet_aabb] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds)) { // AABB computation auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); - for (u32 i = 0; i < meshlet.triangle_count * 3; i++) { + for (u32 i = 0; i < raw_meshlet.triangle_count * 3; i++) { const auto &tri_pos = - vertices[indirect_vertex_indices[meshlet.vertex_offset + local_triangle_indices[meshlet.triangle_offset + i]]]; + mesh_vertices[indirect_vertex_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } - meshlet.triangle_offset += mesh_local_triangle_indices.size(); - meshlet.vertex_offset += mesh_indirect_vertex_indices.size(); + meshlet.indirect_vertex_index_offset = raw_meshlet.vertex_offset; + meshlet.local_triangle_index_offset = raw_meshlet.triangle_offset; + meshlet.vertex_count = raw_meshlet.vertex_count; + meshlet.triangle_count = raw_meshlet.triangle_count; meshlet_aabb.aabb_min = meshlet_bb_min; meshlet_aabb.aabb_max = meshlet_bb_max; } - primitive.meshlet_count = meshlet_count; + cur_lod.meshlet_offset = mesh_meshlets.size(); + cur_lod.meshlet_count = meshlet_count; + cur_lod.index_offset = mesh_indices.size(); + cur_lod.index_count = indices.size(); std::ranges::move(indices, std::back_inserter(mesh_indices)); - std::ranges::move(vertices, std::back_inserter(mesh_vertices)); - std::ranges::move(normals, std::back_inserter(mesh_normals)); - std::ranges::move(texcoords, std::back_inserter(mesh_texcoords)); std::ranges::move(meshlets, std::back_inserter(mesh_meshlets)); std::ranges::move(meshlet_bounds, std::back_inserter(mesh_meshlet_bounds)); std::ranges::move(local_triangle_indices, std::back_inserter(mesh_local_triangle_indices)); std::ranges::move(indirect_vertex_indices, std::back_inserter(mesh_indirect_vertex_indices)); } - } - auto upload_size = 0 // - + ls::size_bytes(mesh_indices) // - + ls::size_bytes(mesh_vertices) // - + ls::size_bytes(mesh_normals) // - + ls::size_bytes(mesh_texcoords) // - + ls::size_bytes(mesh_meshlets) // - + ls::size_bytes(mesh_meshlet_bounds) // - + ls::size_bytes(mesh_local_triangle_indices) // - + ls::size_bytes(mesh_indirect_vertex_indices); - gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); - auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); - - auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); - auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); - auto upload_offset = 0_u64; - - gpu_mesh.indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indices.data(), ls::size_bytes(mesh_indices)); - upload_offset += ls::size_bytes(mesh_indices); - - gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); - upload_offset += ls::size_bytes(mesh_vertices); - - gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); - upload_offset += ls::size_bytes(mesh_normals); - - if (!mesh_texcoords.empty()) { - gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); - upload_offset += ls::size_bytes(mesh_texcoords); - } + auto upload_size = 0 // + + ls::size_bytes(mesh_indices) // + + ls::size_bytes(mesh_vertices) // + + ls::size_bytes(mesh_normals) // + + ls::size_bytes(mesh_texcoords) // + + ls::size_bytes(mesh_meshlets) // + + ls::size_bytes(mesh_meshlet_bounds) // + + ls::size_bytes(mesh_local_triangle_indices) // + + ls::size_bytes(mesh_indirect_vertex_indices); + gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); + auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); + + auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); + auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); + auto upload_offset = 0_u64; + + gpu_mesh.indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indices.data(), ls::size_bytes(mesh_indices)); + upload_offset += ls::size_bytes(mesh_indices); + + gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); + upload_offset += ls::size_bytes(mesh_vertices); + + gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); + upload_offset += ls::size_bytes(mesh_normals); + + if (!mesh_texcoords.empty()) { + gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); + upload_offset += ls::size_bytes(mesh_texcoords); + } - gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlets.data(), ls::size_bytes(mesh_meshlets)); - upload_offset += ls::size_bytes(mesh_meshlets); + gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlets.data(), ls::size_bytes(mesh_meshlets)); + upload_offset += ls::size_bytes(mesh_meshlets); - gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlet_bounds.data(), ls::size_bytes(mesh_meshlet_bounds)); - upload_offset += ls::size_bytes(mesh_meshlet_bounds); + gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlet_bounds.data(), ls::size_bytes(mesh_meshlet_bounds)); + upload_offset += ls::size_bytes(mesh_meshlet_bounds); - gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_local_triangle_indices.data(), ls::size_bytes(mesh_local_triangle_indices)); - upload_offset += ls::size_bytes(mesh_local_triangle_indices); + gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_local_triangle_indices.data(), ls::size_bytes(mesh_local_triangle_indices)); + upload_offset += ls::size_bytes(mesh_local_triangle_indices); - gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indirect_vertex_indices.data(), ls::size_bytes(mesh_indirect_vertex_indices)); - upload_offset += ls::size_bytes(mesh_indirect_vertex_indices); + gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; + std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indirect_vertex_indices.data(), ls::size_bytes(mesh_indirect_vertex_indices)); + upload_offset += ls::size_bytes(mesh_indirect_vertex_indices); - transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + } } return true; diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index a7ccf2ab..8c3c1888 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -67,7 +67,6 @@ struct Model { struct Primitive { MaterialID material_id = MaterialID::Invalid; - u32 meshlet_count = 0; u32 vertex_count = 0; u32 vertex_offset = 0; u32 index_count = 0; diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 95289721..76e120df 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -190,15 +190,15 @@ public struct Material { }; public struct Meshlet { - public u32 vertex_offset = 0; - public u32 triangle_offset = 0; + public u32 indirect_vertex_index_offset = 0; + public u32 local_triangle_index_offset = 0; public u32 vertex_count = 0; public u32 triangle_count = 0; // Takes a local triange index and returns an index to index buffer. public func index(in Mesh mesh, u32 i) -> u32 { - let local_triangle_index = u32(mesh.local_triangle_indices[this.triangle_offset + i]); - return mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_index]; + let local_triangle_index = u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i]); + return mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_index]; } // Returns position of a vertex. @@ -218,14 +218,14 @@ public struct Meshlet { public func indices(in Mesh mesh, u32 i) -> u32x3 { let local_triangle_indices = u32x3( - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 0]), - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 1]), - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 2]), + u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 0]), + u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 1]), + u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 2]), ); - return { mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.x], - mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.y], - mesh.indirect_vertex_indices[this.vertex_offset + local_triangle_indices.z] }; + return { mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.x], + mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.y], + mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.z] }; } public func positions(in Mesh mesh, in u32x3 indices) -> f32x3x3 { @@ -271,6 +271,10 @@ public struct MeshLOD { public f32 error = 0.0; }; +#ifndef MESH_MAX_LODS +#define MESH_MAX_LODS 8 +#endif + public struct Mesh { public u32 *indices = nullptr; public f32x3 *vertex_positions = nullptr; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index bebcd82e..5f80ebe3 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -150,6 +150,13 @@ struct Material { alignas(4) u32 occlusion_image_index = ~0_u32; }; +struct Meshlet { + alignas(4) u32 indirect_vertex_index_offset = 0; + alignas(4) u32 local_triangle_index_offset = 0; + alignas(4) u32 vertex_count = 0; + alignas(4) u32 triangle_count = 0; +}; + struct MeshletBounds { alignas(4) glm::vec3 aabb_min = {}; alignas(4) glm::vec3 aabb_max = {}; @@ -171,7 +178,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 1_sz; + constexpr static auto MAX_LODS = 8_sz; alignas(8) u64 indices = 0; alignas(8) u64 vertex_positions = 0; @@ -181,8 +188,8 @@ struct Mesh { alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; alignas(8) u64 indirect_vertex_indices = 0; + alignas(4) u32 material_index = 0; alignas(4) u32 lod_count = 0; - alignas(4) u32 padding = 0; // we want to be extra safe here alignas(4) MeshLOD lods[MAX_LODS] = {}; }; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 93a3ffb7..0a06757c 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -731,23 +731,24 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { auto gpu_meshlet_instances = std::vector(); for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { - auto mesh_index = static_cast(gpu_meshes.size()); - auto *model = app.asset_man.get_model(rendering_mesh.n0); - const auto &gpu_mesh = model->gpu_meshes[rendering_mesh.n1]; const auto &mesh = model->meshes[rendering_mesh.n1]; - gpu_meshes.push_back(gpu_mesh); // ── INSTANCING ────────────────────────────────────────────────────── - for (const auto transform_id : transform_ids) { - for (auto primitive_index : mesh.primitive_indices) { - const auto &primitive = model->primitives[primitive_index]; - for (u32 meshlet_index = 0; meshlet_index < primitive.meshlet_count; meshlet_index++) { + for (auto primitive_index : mesh.primitive_indices) { + const auto &gpu_mesh = model->gpu_meshes[primitive_index]; + const auto &lod = gpu_mesh.lods[gpu_mesh.lod_count - 1]; + + auto mesh_index = static_cast(gpu_meshes.size()); + gpu_meshes.emplace_back(gpu_mesh); + + for (const auto transform_id : transform_ids) { + for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); meshlet_instance.mesh_index = mesh_index; meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; - meshlet_instance.material_index = SlotMap_decode_id(primitive.material_id).index; - meshlet_instance.meshlet_index = meshlet_index; + meshlet_instance.meshlet_index = lod.meshlet_offset + meshlet_index; + meshlet_instance.material_index = gpu_mesh.material_index; } } } diff --git a/xmake/packages.lua b/xmake/packages.lua index a9732bcb..22c900e3 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -59,7 +59,7 @@ add_requires("vuk 2025.06.15", { configs = { disable_exceptions = true, }, debug = is_mode("debug") }) -add_requires("meshoptimizer v0.22") +add_requires("meshoptimizer v0.24", { debug = true }) add_requires("ktx v4.4.0") add_requires("svector v1.0.3") From f478c6ea07bd50dbd5ae25cdab520eff12988883 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 27 Jul 2025 21:41:29 +0300 Subject: [PATCH 07/16] working lods --- Lorr/Engine/Asset/Asset.cc | 172 ++++++++++-------- .../shaders/passes/cull_meshlets.slang | 3 +- .../shaders/passes/cull_triangles.slang | 12 +- .../shaders/passes/visbuffer_decode.slang | 7 +- .../shaders/passes/visbuffer_encode.slang | 9 +- Lorr/Engine/Resources/shaders/scene.slang | 37 ++-- Lorr/Engine/Scene/GPUScene.hh | 17 +- Lorr/Engine/Scene/Scene.cc | 7 +- xmake/packages.lua | 2 +- 9 files changed, 144 insertions(+), 122 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 51e87467..c0a4cb07 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -17,6 +17,7 @@ #include "Engine/Scene/ECSModule/Core.hh" +#include #include #include @@ -767,6 +768,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto &gpu_mesh = model->gpu_meshes[primitive_index]; auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; + gpu_mesh.material_index = SlotMap_decode_id(primitive.material_id).index; + + // ── Geometry remapping ────────────────────────────────────────────── auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); auto primitive_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); @@ -798,7 +802,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { remapped_vertices.data() ); - auto mesh_texcoords = std::vector(); + auto mesh_texcoords = std::vector(); if (!primitive_texcoords.empty()) { mesh_texcoords.resize(vertex_count); meshopt_remapVertexBuffer( @@ -810,33 +814,39 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { ); } - auto mesh_indices = std::vector(); - auto mesh_meshlets = std::vector(); - auto mesh_meshlet_bounds = std::vector(); - auto mesh_local_triangle_indices = std::vector(); - auto mesh_indirect_vertex_indices = std::vector(); + auto mesh_indices = std::vector(primitive.index_count); + meshopt_remapIndexBuffer(mesh_indices.data(), primitive_indices.data(), primitive_indices.size(), remapped_vertices.data()); + + // ── LOD generation ────────────────────────────────────────────────── + + const auto mesh_upload_size = 0 // + + ls::size_bytes(mesh_vertices) // + + ls::size_bytes(mesh_normals) // + + ls::size_bytes(mesh_texcoords); + auto upload_size = mesh_upload_size; + ls::pair, u64> lod_cpu_buffers[GPU::Mesh::MAX_LODS] = {}; auto last_lod_indices = std::vector(); for (auto lod_index = 0_sz; lod_index < GPU::Mesh::MAX_LODS; lod_index++) { ZoneNamedN(z, "GPU Meshlet Generation", true); - auto &cur_lod = gpu_mesh.lods[gpu_mesh.lod_count++]; + auto &cur_lod = gpu_mesh.lods[lod_index]; auto simplified_indices = std::vector(); if (lod_index == 0) { - simplified_indices = std::vector(primitive_indices.begin(), primitive_indices.end()); + simplified_indices = std::vector(mesh_indices.begin(), mesh_indices.end()); } else { - auto lod_index_count = (static_cast(static_cast(last_lod_indices.size()) * 0.65f) / 3_sz) * 3_sz; + auto lod_index_count = ((last_lod_indices.size() + 5_sz) / 6_sz) * 3_sz; simplified_indices.resize(last_lod_indices.size(), 0_u32); - const auto target_error = 1e-1f; + const auto target_error = std::numeric_limits::max(); auto result_error = 0.0f; auto result_index_count = meshopt_simplify( simplified_indices.data(), last_lod_indices.data(), last_lod_indices.size(), - reinterpret_cast(primitive_vertices.data()), - primitive_vertices.size(), + reinterpret_cast(mesh_vertices.data()), + mesh_vertices.size(), sizeof(glm::vec3), lod_index_count, target_error, @@ -844,7 +854,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { &result_error ); - if (result_index_count == last_lod_indices.size() || result_index_count == 0) { + cur_lod.error = result_error; + + if (result_index_count > (lod_index_count + lod_index_count / 2) || result_error > 0.5) { // Error bound break; } @@ -852,19 +864,14 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { simplified_indices.resize(result_index_count); } + gpu_mesh.lod_count += 1; last_lod_indices = simplified_indices; - auto indices = std::vector(simplified_indices.size()); - meshopt_remapIndexBuffer(indices.data(), simplified_indices.data(), simplified_indices.size(), remapped_vertices.data()); - - { - auto optimized_indices = std::vector(indices.size()); - meshopt_optimizeVertexCache(optimized_indices.data(), indices.data(), indices.size(), vertex_count); - indices = std::move(optimized_indices); - } + meshopt_optimizeVertexCache(simplified_indices.data(), simplified_indices.data(), simplified_indices.size(), vertex_count); // Worst case count - auto max_meshlet_count = meshopt_buildMeshletsBound(indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); + auto max_meshlet_count = + meshopt_buildMeshletsBound(simplified_indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); auto raw_meshlets = std::vector(max_meshlet_count); auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); @@ -873,9 +880,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { raw_meshlets.data(), indirect_vertex_indices.data(), local_triangle_indices.data(), - indices.data(), - indices.size(), - reinterpret_cast(mesh_vertices.data()), + simplified_indices.data(), + simplified_indices.size(), + reinterpret_cast(mesh_vertices.data()), mesh_vertices.size(), sizeof(glm::vec3), Model::MAX_MESHLET_INDICES, @@ -896,8 +903,8 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); for (u32 i = 0; i < raw_meshlet.triangle_count * 3; i++) { - const auto &tri_pos = - mesh_vertices[indirect_vertex_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; + const auto &tri_pos = mesh_vertices + [indirect_vertex_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } @@ -911,69 +918,86 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { meshlet_aabb.aabb_max = meshlet_bb_max; } - cur_lod.meshlet_offset = mesh_meshlets.size(); + auto lod_upload_size = 0 // + + ls::size_bytes(simplified_indices) // + + ls::size_bytes(meshlets) // + + ls::size_bytes(meshlet_bounds) // + + ls::size_bytes(local_triangle_indices) // + + ls::size_bytes(indirect_vertex_indices); + auto cpu_lod_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, lod_upload_size); + auto cpu_lod_ptr = reinterpret_cast(cpu_lod_buffer->mapped_ptr); + + auto upload_offset = 0_u64; + cur_lod.indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, simplified_indices.data(), ls::size_bytes(simplified_indices)); + upload_offset += ls::size_bytes(simplified_indices); + + cur_lod.meshlets = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, meshlets.data(), ls::size_bytes(meshlets)); + upload_offset += ls::size_bytes(meshlets); + + cur_lod.meshlet_bounds = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, meshlet_bounds.data(), ls::size_bytes(meshlet_bounds)); + upload_offset += ls::size_bytes(meshlet_bounds); + + cur_lod.local_triangle_indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, local_triangle_indices.data(), ls::size_bytes(local_triangle_indices)); + upload_offset += ls::size_bytes(local_triangle_indices); + + cur_lod.indirect_vertex_indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); + upload_offset += ls::size_bytes(indirect_vertex_indices); + cur_lod.meshlet_count = meshlet_count; - cur_lod.index_offset = mesh_indices.size(); - cur_lod.index_count = indices.size(); - - std::ranges::move(indices, std::back_inserter(mesh_indices)); - std::ranges::move(meshlets, std::back_inserter(mesh_meshlets)); - std::ranges::move(meshlet_bounds, std::back_inserter(mesh_meshlet_bounds)); - std::ranges::move(local_triangle_indices, std::back_inserter(mesh_local_triangle_indices)); - std::ranges::move(indirect_vertex_indices, std::back_inserter(mesh_indirect_vertex_indices)); + + lod_cpu_buffers[lod_index] = ls::pair(cpu_lod_buffer, lod_upload_size); + upload_size += lod_upload_size; } - auto upload_size = 0 // - + ls::size_bytes(mesh_indices) // - + ls::size_bytes(mesh_vertices) // - + ls::size_bytes(mesh_normals) // - + ls::size_bytes(mesh_texcoords) // - + ls::size_bytes(mesh_meshlets) // - + ls::size_bytes(mesh_meshlet_bounds) // - + ls::size_bytes(mesh_local_triangle_indices) // - + ls::size_bytes(mesh_indirect_vertex_indices); + auto mesh_upload_offset = 0_u64; gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); - auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); - auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, upload_size); + // Mesh first + auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, mesh_upload_size); auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); - auto upload_offset = 0_u64; - - gpu_mesh.indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indices.data(), ls::size_bytes(mesh_indices)); - upload_offset += ls::size_bytes(mesh_indices); - gpu_mesh.vertex_positions = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); - upload_offset += ls::size_bytes(mesh_vertices); + auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); + gpu_mesh.vertex_positions = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); + mesh_upload_offset += ls::size_bytes(mesh_vertices); - gpu_mesh.vertex_normals = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); - upload_offset += ls::size_bytes(mesh_normals); + gpu_mesh.vertex_normals = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); + mesh_upload_offset += ls::size_bytes(mesh_normals); if (!mesh_texcoords.empty()) { - gpu_mesh.texture_coords = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); - upload_offset += ls::size_bytes(mesh_texcoords); + gpu_mesh.texture_coords = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); + mesh_upload_offset += ls::size_bytes(mesh_texcoords); } - gpu_mesh.meshlets = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlets.data(), ls::size_bytes(mesh_meshlets)); - upload_offset += ls::size_bytes(mesh_meshlets); + // ignore spilling out buffer size by alignment + auto gpu_mesh_buffer_val = gpu_mesh_buffer.discard(*impl->device, "gpu mesh buffer", 0, mesh_upload_size); + gpu_mesh_buffer_val = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_buffer_val)); + transfer_man.wait_on(std::move(gpu_mesh_buffer_val)); - gpu_mesh.meshlet_bounds = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_meshlet_bounds.data(), ls::size_bytes(mesh_meshlet_bounds)); - upload_offset += ls::size_bytes(mesh_meshlet_bounds); + for (auto lod_index = 0_sz; lod_index < gpu_mesh.lod_count; lod_index++) { + auto &[lod_cpu_buffer, lod_upload_size] = lod_cpu_buffers[lod_index]; + auto &lod = gpu_mesh.lods[lod_index]; - gpu_mesh.local_triangle_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_local_triangle_indices.data(), ls::size_bytes(mesh_local_triangle_indices)); - upload_offset += ls::size_bytes(mesh_local_triangle_indices); + lod.indices += gpu_mesh_bda + mesh_upload_offset; + lod.meshlets += gpu_mesh_bda + mesh_upload_offset; + lod.meshlet_bounds += gpu_mesh_bda + mesh_upload_offset; + lod.local_triangle_indices += gpu_mesh_bda + mesh_upload_offset; + lod.indirect_vertex_indices += gpu_mesh_bda + mesh_upload_offset; - gpu_mesh.indirect_vertex_indices = gpu_mesh_bda + upload_offset; - std::memcpy(cpu_mesh_ptr + upload_offset, mesh_indirect_vertex_indices.data(), ls::size_bytes(mesh_indirect_vertex_indices)); - upload_offset += ls::size_bytes(mesh_indirect_vertex_indices); + // auto cpu_lod_subrange = lod_cpu_buffer.subrange(mesh_upload_offset, lod_upload_size); + auto gpu_lod_subrange = gpu_mesh_buffer.discard(*impl->device, "gpu mesh buffer", mesh_upload_offset, lod_upload_size); + gpu_lod_subrange = transfer_man.upload_staging(std::move(lod_cpu_buffer), std::move(gpu_lod_subrange)); + transfer_man.wait_on(std::move(gpu_lod_subrange)); - transfer_man.wait_on(std::move(transfer_man.upload_staging(std::move(cpu_mesh_buffer), gpu_mesh_buffer))); + mesh_upload_offset += lod_upload_size; + } } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 346b99ea..bee2218b 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -57,7 +57,8 @@ func cs_main( let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; - let bounds = mesh.meshlet_bounds[meshlet_instance.meshlet_index]; + let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; let transform = params.transforms[meshlet_instance.transform_index]; let aabb_min = bounds.aabb_min; diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 5efd8b23..14372d60 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -71,10 +71,7 @@ func CullSmallPrimitive(f32x2x3 vertices, f32x2 viewportExtent) -> bool { ); } -func test_triangle(in Mesh mesh, in Meshlet meshlet, in f32x2 resolution, CullFlags cull_flags, u32 triangle_index) -> bool { - let indices = meshlet.indices(mesh, triangle_index); - let positions = meshlet.positions(mesh, indices); - +func test_triangle(in f32x3x3 positions, in f32x2 resolution, CullFlags cull_flags, u32 triangle_index) -> bool { let clip_pos_0 = mul(model_view_proj_shared, f32x4(positions[0], 1.0)); let clip_pos_1 = mul(model_view_proj_shared, f32x4(positions[1], 1.0)); let clip_pos_2 = mul(model_view_proj_shared, f32x4(positions[2], 1.0)); @@ -121,7 +118,8 @@ func cs_main( let meshlet_instance_index = params.visible_meshlet_instances_indices[visible_meshlet_index]; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; + let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; if (local_index == 0) { triangles_passed_shared = 0; @@ -134,7 +132,9 @@ func cs_main( var triangle_passed = false; var active_triangle_index = 0; if (local_index < meshlet.triangle_count) { - triangle_passed = test_triangle(mesh, meshlet, params.camera.resolution, cull_flags, local_index); + let indices = meshlet.indices(mesh_lod, local_index); + let positions = meshlet.positions(mesh, indices); + triangle_passed = test_triangle(positions, params.camera.resolution, cull_flags, local_index); if (triangle_passed) { active_triangle_index = std::atomic_add(triangles_passed_shared, 1, std::memory_order_relaxed); } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index dc553bf7..587ecaec 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -96,11 +96,12 @@ func fs_main(VertexOutput input) -> FragmentOutput { let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh = params.meshes[meshlet_instance.mesh_index]; - let material = params.materials[meshlet_instance.material_index]; + let material = params.materials[mesh.material_index]; let transform = params.transforms[meshlet_instance.transform_index]; - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; + let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; - let indices = meshlet.indices(mesh, vis.triangle_index); + let indices = meshlet.indices(mesh_lod, vis.triangle_index); let positions = meshlet.positions(mesh, indices); let normals = meshlet.normals(mesh, indices); let tex_coords = meshlet.tex_coords(mesh, indices); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 28f7af09..3f184d0d 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -28,11 +28,12 @@ struct VertexOutput { func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { let vis = VisBufferData(vertex_index); let meshlet_instance = params.meshlet_instances[vis.meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; let transform = params.transforms[meshlet_instance.transform_index]; - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; + let mesh = params.meshes[meshlet_instance.mesh_index]; + let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; - let index = meshlet.index(mesh, vis.triangle_index); + let index = meshlet.index(mesh_lod, vis.triangle_index); let vertex_pos = meshlet.position(mesh, index); let tex_coord = meshlet.tex_coord(mesh, index); let world_pos = transform.to_world_position(vertex_pos); @@ -44,7 +45,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.tex_coord = tex_coord; output.meshlet_instance_index = vis.meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; - output.material_index = meshlet_instance.material_index; + output.material_index = mesh.material_index; return output; } diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 76e120df..a1e34c79 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -196,9 +196,9 @@ public struct Meshlet { public u32 triangle_count = 0; // Takes a local triange index and returns an index to index buffer. - public func index(in Mesh mesh, u32 i) -> u32 { - let local_triangle_index = u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i]); - return mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_index]; + public func index(in MeshLOD mesh_lod, u32 i) -> u32 { + let local_triangle_index = u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i]); + return mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_index]; } // Returns position of a vertex. @@ -216,16 +216,16 @@ public struct Meshlet { // ---------------------------------------------------------- - public func indices(in Mesh mesh, u32 i) -> u32x3 { + public func indices(in MeshLOD mesh_lod, u32 i) -> u32x3 { let local_triangle_indices = u32x3( - u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 0]), - u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 1]), - u32(mesh.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 2]), + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 0]), + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 1]), + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 2]), ); - return { mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.x], - mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.y], - mesh.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.z] }; + return { mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.x], + mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.y], + mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.z] }; } public func positions(in Mesh mesh, in u32x3 indices) -> f32x3x3 { @@ -258,15 +258,17 @@ public struct MeshletBounds { public struct MeshletInstance { public u32 mesh_index = 0; + public u32 lod_index = 0; public u32 transform_index = 0; - public u32 material_index = 0; public u32 meshlet_index = 0; }; public struct MeshLOD { - public u32 index_offset = 0; - public u32 index_count = 0; - public u32 meshlet_offset = 0; + public u32 *indices = nullptr; + public Meshlet *meshlets = nullptr; + public MeshletBounds *meshlet_bounds = nullptr; + public u8 *local_triangle_indices = nullptr; + public u32 *indirect_vertex_indices = nullptr; public u32 meshlet_count = 0; public f32 error = 0.0; }; @@ -276,16 +278,11 @@ public struct MeshLOD { #endif public struct Mesh { - public u32 *indices = nullptr; public f32x3 *vertex_positions = nullptr; public f32x3 *vertex_normals = nullptr; public f32x2 *texture_coords = nullptr; - public Meshlet *meshlets = nullptr; - public MeshletBounds *meshlet_bounds = nullptr; - public u8 *local_triangle_indices = nullptr; - public u32 *indirect_vertex_indices = nullptr; + public u32 material_index = 0; public u32 lod_count = 0; - public u32 _padding = 0; public MeshLOD lods[MESH_MAX_LODS] = {}; }; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 5f80ebe3..4dad03f5 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -164,15 +164,17 @@ struct MeshletBounds { struct MeshletInstance { alignas(4) u32 mesh_index = 0; + alignas(4) u32 lod_index = 0; alignas(4) u32 transform_index = 0; - alignas(4) u32 material_index = 0; alignas(4) u32 meshlet_index = 0; }; struct MeshLOD { - alignas(4) u32 index_offset = 0; - alignas(4) u32 index_count = 0; - alignas(4) u32 meshlet_offset = 0; + alignas(8) u64 indices = 0; + alignas(8) u64 meshlets = 0; + alignas(8) u64 meshlet_bounds = 0; + alignas(8) u64 local_triangle_indices = 0; + alignas(8) u64 indirect_vertex_indices = 0; alignas(4) u32 meshlet_count = 0; alignas(4) f32 error = 0.0f; }; @@ -180,17 +182,12 @@ struct MeshLOD { struct Mesh { constexpr static auto MAX_LODS = 8_sz; - alignas(8) u64 indices = 0; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; alignas(8) u64 texture_coords = 0; - alignas(8) u64 meshlets = 0; - alignas(8) u64 meshlet_bounds = 0; - alignas(8) u64 local_triangle_indices = 0; - alignas(8) u64 indirect_vertex_indices = 0; alignas(4) u32 material_index = 0; alignas(4) u32 lod_count = 0; - alignas(4) MeshLOD lods[MAX_LODS] = {}; + alignas(8) MeshLOD lods[MAX_LODS] = {}; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 0a06757c..ec771644 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -737,7 +737,8 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { // ── INSTANCING ────────────────────────────────────────────────────── for (auto primitive_index : mesh.primitive_indices) { const auto &gpu_mesh = model->gpu_meshes[primitive_index]; - const auto &lod = gpu_mesh.lods[gpu_mesh.lod_count - 1]; + auto lod_index = gpu_mesh.lod_count - 1; + const auto &lod = gpu_mesh.lods[lod_index]; auto mesh_index = static_cast(gpu_meshes.size()); gpu_meshes.emplace_back(gpu_mesh); @@ -746,9 +747,9 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); meshlet_instance.mesh_index = mesh_index; + meshlet_instance.lod_index = lod_index; meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; - meshlet_instance.meshlet_index = lod.meshlet_offset + meshlet_index; - meshlet_instance.material_index = gpu_mesh.material_index; + meshlet_instance.meshlet_index = meshlet_index; } } } diff --git a/xmake/packages.lua b/xmake/packages.lua index 22c900e3..cc0f3e12 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -54,7 +54,7 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") add_requires("shader-slang v2025.12.1") -add_requires("vuk 2025.06.15", { configs = { +add_requires("vuk 2025.07.09", { configs = { debug_allocations = false, disable_exceptions = true, }, debug = is_mode("debug") }) From 59bc83affd922e9759fa575f289b374738ea5fc7 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 27 Jul 2025 23:34:51 +0300 Subject: [PATCH 08/16] add normal lod bias --- Lorr/Engine/Asset/Asset.cc | 18 ++++++++++++------ Lorr/Engine/Scene/Scene.cc | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index c0a4cb07..8827a890 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -836,27 +836,33 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { if (lod_index == 0) { simplified_indices = std::vector(mesh_indices.begin(), mesh_indices.end()); } else { + const auto &last_lod = gpu_mesh.lods[lod_index - 1]; auto lod_index_count = ((last_lod_indices.size() + 5_sz) / 6_sz) * 3_sz; simplified_indices.resize(last_lod_indices.size(), 0_u32); - const auto target_error = std::numeric_limits::max(); + constexpr auto TARGET_ERROR = std::numeric_limits::max(); + constexpr f32 NORMAL_WEIGHTS[] = { 1.0f, 1.0f, 1.0f }; auto result_error = 0.0f; - auto result_index_count = meshopt_simplify( + auto result_index_count = meshopt_simplifyWithAttributes( simplified_indices.data(), last_lod_indices.data(), last_lod_indices.size(), reinterpret_cast(mesh_vertices.data()), mesh_vertices.size(), sizeof(glm::vec3), + reinterpret_cast(mesh_normals.data()), + sizeof(glm::vec3), + NORMAL_WEIGHTS, + ls::count_of(NORMAL_WEIGHTS), + nullptr, lod_index_count, - target_error, + TARGET_ERROR, 0, &result_error ); - cur_lod.error = result_error; - - if (result_index_count > (lod_index_count + lod_index_count / 2) || result_error > 0.5) { + cur_lod.error = last_lod.error + result_error; + if (result_index_count > (lod_index_count + lod_index_count / 2) || result_error > 0.5 || result_index_count < 6) { // Error bound break; } diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index ec771644..7021cf74 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -737,13 +737,13 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { // ── INSTANCING ────────────────────────────────────────────────────── for (auto primitive_index : mesh.primitive_indices) { const auto &gpu_mesh = model->gpu_meshes[primitive_index]; - auto lod_index = gpu_mesh.lod_count - 1; - const auto &lod = gpu_mesh.lods[lod_index]; - auto mesh_index = static_cast(gpu_meshes.size()); gpu_meshes.emplace_back(gpu_mesh); for (const auto transform_id : transform_ids) { + auto lod_index = gpu_mesh.lod_count - 1; + const auto &lod = gpu_mesh.lods[lod_index]; + for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); meshlet_instance.mesh_index = mesh_index; From 204a0f42e47c2bdebda56ef11ccf5f072cbc9e7f Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Mon, 28 Jul 2025 14:39:02 +0300 Subject: [PATCH 09/16] add mesh instances --- Lorr/Engine/Asset/Asset.cc | 19 +- Lorr/Engine/Resources/shaders/cull.slang | 23 ++ .../shaders/passes/cull_meshes.slang | 61 +++++ .../shaders/passes/cull_meshlets.slang | 35 +-- .../shaders/passes/cull_triangles.slang | 11 +- .../shaders/passes/editor_mousepick.slang | 6 +- .../shaders/passes/visbuffer_decode.slang | 11 +- .../shaders/passes/visbuffer_encode.slang | 8 +- Lorr/Engine/Resources/shaders/scene.slang | 12 +- Lorr/Engine/Scene/GPUScene.hh | 26 ++- Lorr/Engine/Scene/Scene.cc | 17 +- Lorr/Engine/Scene/SceneRenderer.cc | 214 +++++++++++------- Lorr/Engine/Scene/SceneRenderer.hh | 7 +- 13 files changed, 298 insertions(+), 152 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/passes/cull_meshes.slang diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 8827a890..303b4297 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -768,8 +768,6 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto &gpu_mesh = model->gpu_meshes[primitive_index]; auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; - gpu_mesh.material_index = SlotMap_decode_id(primitive.material_id).index; - // ── Geometry remapping ────────────────────────────────────────────── auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); @@ -903,7 +901,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); - auto meshlet_bounds = std::vector(meshlet_count); + auto meshlet_bounds = std::vector(meshlet_count); for (const auto &[raw_meshlet, meshlet, meshlet_aabb] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds)) { // AABB computation auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); @@ -922,6 +920,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { meshlet_aabb.aabb_min = meshlet_bb_min; meshlet_aabb.aabb_max = meshlet_bb_max; + + gpu_mesh.bounds.aabb_max = glm::max(gpu_mesh.bounds.aabb_max, meshlet_bb_max); + gpu_mesh.bounds.aabb_min = glm::max(gpu_mesh.bounds.aabb_min, meshlet_bb_min); } auto lod_upload_size = 0 // @@ -983,12 +984,13 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { } // ignore spilling out buffer size by alignment - auto gpu_mesh_buffer_val = gpu_mesh_buffer.discard(*impl->device, "gpu mesh buffer", 0, mesh_upload_size); - gpu_mesh_buffer_val = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_buffer_val)); - transfer_man.wait_on(std::move(gpu_mesh_buffer_val)); + auto gpu_mesh_buffer_handle = impl->device->buffer(gpu_mesh_buffer.id()); + auto gpu_mesh_subrange = vuk::discard_buf("mesh", gpu_mesh_buffer_handle->subrange(0, mesh_upload_size)); + gpu_mesh_subrange = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_subrange)); + transfer_man.wait_on(std::move(gpu_mesh_subrange)); for (auto lod_index = 0_sz; lod_index < gpu_mesh.lod_count; lod_index++) { - auto &[lod_cpu_buffer, lod_upload_size] = lod_cpu_buffers[lod_index]; + auto &&[lod_cpu_buffer, lod_upload_size] = lod_cpu_buffers[lod_index]; auto &lod = gpu_mesh.lods[lod_index]; lod.indices += gpu_mesh_bda + mesh_upload_offset; @@ -997,8 +999,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { lod.local_triangle_indices += gpu_mesh_bda + mesh_upload_offset; lod.indirect_vertex_indices += gpu_mesh_bda + mesh_upload_offset; - // auto cpu_lod_subrange = lod_cpu_buffer.subrange(mesh_upload_offset, lod_upload_size); - auto gpu_lod_subrange = gpu_mesh_buffer.discard(*impl->device, "gpu mesh buffer", mesh_upload_offset, lod_upload_size); + auto gpu_lod_subrange = vuk::discard_buf("mesh lod subrange", gpu_mesh_buffer_handle->subrange(mesh_upload_offset, lod_upload_size)); gpu_lod_subrange = transfer_man.upload_staging(std::move(lod_cpu_buffer), std::move(gpu_lod_subrange)); transfer_man.wait_on(std::move(gpu_lod_subrange)); diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index d29c3eed..7bfd823c 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -51,6 +51,29 @@ public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_min, f32x3 aabb_exten return ret; } + +public func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> bool { + f32x4 planes[] = { + normalize_plane(mvp[3] + mvp[0]), + normalize_plane(mvp[3] - mvp[0]), + normalize_plane(mvp[3] + mvp[1]), + normalize_plane(mvp[3] - mvp[1]), + normalize_plane(mvp[2]) + }; + + let aabb_half_extent = aabb_extent * 0.5; + [[unroll]] + for (uint i = 0; i < planes.getCount(); i++) { + let flip = asuint(planes[i].xyz) & 0x80000000; + let sign_flipped = asfloat(asuint(aabb_half_extent) ^ flip); + if (dot(aabb_center + sign_flipped, planes[i].xyz) <= -planes[i].w) { + return false; + } + } + + return true; +} + public func test_occlusion( in ScreenAabb screen_aabb, in Image2D hiz_image, diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang new file mode 100644 index 00000000..05b2b4e0 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -0,0 +1,61 @@ +import std; +import gpu; +import scene; +import cull; +import debug_drawer; + +struct ShaderParameters { + RWStructuredBuffer cull_meshlets_cmd; + ConstantBuffer camera; + RWStructuredBuffer visible_mesh_instances_indices; + StructuredBuffer mesh_instance; + StructuredBuffer meshes; + StructuredBuffer transforms; + RWStructuredBuffer debug_drawer; +}; + +#ifndef CULLING_MESHES_COUNT + #define CULLING_MESHES_COUNT 64 +#endif + +[[shader("compute")]] +[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] +func cs_main( + uint3 thread_id : SV_DispatchThreadID, + uniform ParameterBlock params, + uniform u32 mesh_instances_count, + uniform CullFlags cull_flags +) -> void { + let mesh_instance_index = thread_id.x; + if (mesh_instance_index >= mesh_instances_count) { + return; + } + + let mesh_instance = params.mesh_instance[mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let transform = params.transforms[mesh_instance.transform_index]; + + let aabb_min = mesh.bounds.aabb_min; + let aabb_max = mesh.bounds.aabb_max; + let aabb_extent = aabb_max - aabb_min; + let aabb_center = (aabb_min + aabb_max) * 0.5; + + var visible = true; + if (visible && (cull_flags & CullFlags::MeshletFrustum)) { + let cur_mvp = mul(params.camera.projection_view_mat, transform.world); + visible = test_frustum(cur_mvp, aabb_center, aabb_extent); + + var debug_aabb = DebugAABB(); + debug_aabb.position = mul(transform.world, f32x4(aabb_center, 1.0)).xyz; + debug_aabb.size = mul(transform.world, f32x4(aabb_extent, 0.0)).xyz; + debug_aabb.color = f32x3(0.0, 1.0, 0.0); + debug_aabb.coord = DebugDrawCoord::World; + debug_draw_aabb(params.debug_drawer[0], debug_aabb); + } + + if (visible) { + let index = std::atomic_add(params.cull_meshlets_cmd[0].x, mesh_lod.meshlet_count, std::memory_order_relaxed); + params.visible_mesh_instances_indices[index] = mesh_instance_index; + } +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index bee2218b..5ad3b95f 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -5,39 +5,19 @@ import cull; import debug_drawer; struct ShaderParameters { - RWStructuredBuffer cull_triangles_cmd; ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; Image2D hiz_image; Sampler hiz_sampler; + + RWStructuredBuffer cull_triangles_cmd; + RWStructuredBuffer visible_meshlet_instances_indices; RWStructuredBuffer debug_drawer; }; -func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> bool { - f32x4 planes[] = { - normalize_plane(mvp[3] + mvp[0]), - normalize_plane(mvp[3] - mvp[0]), - normalize_plane(mvp[3] + mvp[1]), - normalize_plane(mvp[3] - mvp[1]), - normalize_plane(mvp[2]) - }; - - let aabb_half_extent = aabb_extent * 0.5; - [[unroll]] - for (uint i = 0; i < planes.getCount(); i++) { - let flip = asuint(planes[i].xyz) & 0x80000000; - let sign_flipped = asfloat(asuint(aabb_half_extent) ^ flip); - if (dot(aabb_center + sign_flipped, planes[i].xyz) <= -planes[i].w) { - return false; - } - } - - return true; -} - #ifndef CULLING_MESHLET_COUNT #define CULLING_MESHLET_COUNT 64 #endif @@ -56,10 +36,11 @@ func cs_main( } let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; - let transform = params.transforms[meshlet_instance.transform_index]; let aabb_min = bounds.aabb_min; let aabb_max = bounds.aabb_max; diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 14372d60..e530f4ab 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -7,12 +7,14 @@ import scene; import passes.visbuffer; struct ShaderParameters { - RWStructuredBuffer draw_cmd; ConstantBuffer camera; StructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; + + RWStructuredBuffer draw_cmd; RWStructuredBuffer reordered_indices; }; @@ -117,13 +119,14 @@ func cs_main( let visible_meshlet_index = group_id.x; let meshlet_instance_index = params.visible_meshlet_instances_indices[visible_meshlet_index]; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; if (local_index == 0) { triangles_passed_shared = 0; - let transform = params.transforms[meshlet_instance.transform_index]; + let transform = params.transforms[mesh_instance.transform_index]; model_view_proj_shared = mul(params.camera.projection_view_mat, transform.world); } diff --git a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang index a56a3c89..3caf42ee 100644 --- a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang +++ b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang @@ -12,6 +12,9 @@ Image2D visbuffer_data; [[vk::binding(1, 0)]] StructuredBuffer meshlet_instances; +[[vk::binding(2, 0)]] +StructuredBuffer mesh_instances; + struct PushConstants { u32 *dst; u32x2 texel; @@ -30,5 +33,6 @@ func cs_main() -> void { let vis = VisBufferData(texel); let meshlet_instance = meshlet_instances[vis.meshlet_instance_index]; - *C.dst = meshlet_instance.transform_index; + let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; + *C.dst = mesh_instance.transform_index; } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 587ecaec..f388af73 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -8,12 +8,14 @@ import passes.visbuffer; #include struct ShaderParameters { - Image2D visbuffer; ConstantBuffer camera; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; StructuredBuffer materials; + + Image2D visbuffer; }; ParameterBlock params; @@ -95,10 +97,11 @@ func fs_main(VertexOutput input) -> FragmentOutput { let vis = VisBufferData(texel); let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; let material = params.materials[mesh.material_index]; - let transform = params.transforms[meshlet_instance.transform_index]; - let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; let indices = meshlet.indices(mesh_lod, vis.triangle_index); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 3f184d0d..38f3fc4d 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -8,6 +8,7 @@ import passes.visbuffer; struct ShaderParameters { ConstantBuffer camera; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; StructuredBuffer materials; @@ -28,9 +29,10 @@ struct VertexOutput { func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { let vis = VisBufferData(vertex_index); let meshlet_instance = params.meshlet_instances[vis.meshlet_instance_index]; - let transform = params.transforms[meshlet_instance.transform_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let mesh_lod = mesh.lods[meshlet_instance.lod_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let transform = params.transforms[mesh_instance.transform_index]; let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; let index = meshlet.index(mesh_lod, vis.triangle_index); diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index a1e34c79..d1216c1f 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -251,22 +251,27 @@ public struct Meshlet { } }; -public struct MeshletBounds { +public struct Bounds { public f32x3 aabb_min = {}; public f32x3 aabb_max = {}; }; public struct MeshletInstance { + public u32 mesh_instance_index = 0; + public u32 meshlet_index = 0; +}; + +public struct MeshInstance { public u32 mesh_index = 0; public u32 lod_index = 0; + public u32 material_index = 0; public u32 transform_index = 0; - public u32 meshlet_index = 0; }; public struct MeshLOD { public u32 *indices = nullptr; public Meshlet *meshlets = nullptr; - public MeshletBounds *meshlet_bounds = nullptr; + public Bounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; public u32 *indirect_vertex_indices = nullptr; public u32 meshlet_count = 0; @@ -284,6 +289,7 @@ public struct Mesh { public u32 material_index = 0; public u32 lod_count = 0; public MeshLOD lods[MESH_MAX_LODS] = {}; + public Bounds bounds = {}; }; public struct Light { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 4dad03f5..81de7e3f 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -150,23 +150,28 @@ struct Material { alignas(4) u32 occlusion_image_index = ~0_u32; }; -struct Meshlet { - alignas(4) u32 indirect_vertex_index_offset = 0; - alignas(4) u32 local_triangle_index_offset = 0; - alignas(4) u32 vertex_count = 0; - alignas(4) u32 triangle_count = 0; -}; - -struct MeshletBounds { +struct Bounds { alignas(4) glm::vec3 aabb_min = {}; alignas(4) glm::vec3 aabb_max = {}; }; struct MeshletInstance { + alignas(4) u32 mesh_instance_index = 0; + alignas(4) u32 meshlet_index = 0; +}; + +struct MeshInstance { alignas(4) u32 mesh_index = 0; alignas(4) u32 lod_index = 0; + alignas(4) u32 material_index = 0; alignas(4) u32 transform_index = 0; - alignas(4) u32 meshlet_index = 0; +}; + +struct Meshlet { + alignas(4) u32 indirect_vertex_index_offset = 0; + alignas(4) u32 local_triangle_index_offset = 0; + alignas(4) u32 vertex_count = 0; + alignas(4) u32 triangle_count = 0; }; struct MeshLOD { @@ -185,9 +190,10 @@ struct Mesh { alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; alignas(8) u64 texture_coords = 0; - alignas(4) u32 material_index = 0; + alignas(4) u32 _padding = 0; alignas(4) u32 lod_count = 0; alignas(8) MeshLOD lods[MAX_LODS] = {}; + alignas(4) Bounds bounds = {}; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 7021cf74..b1ab0324 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -728,27 +728,35 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { auto &app = Application::get(); auto gpu_meshes = std::vector(); + auto gpu_mesh_instances = std::vector(); auto gpu_meshlet_instances = std::vector(); for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { auto *model = app.asset_man.get_model(rendering_mesh.n0); const auto &mesh = model->meshes[rendering_mesh.n1]; - // ── INSTANCING ────────────────────────────────────────────────────── for (auto primitive_index : mesh.primitive_indices) { + const auto &primitive = model->primitives[primitive_index]; const auto &gpu_mesh = model->gpu_meshes[primitive_index]; auto mesh_index = static_cast(gpu_meshes.size()); gpu_meshes.emplace_back(gpu_mesh); + // ── INSTANCING ────────────────────────────────────────────────── for (const auto transform_id : transform_ids) { + auto mesh_instance_index = static_cast(gpu_mesh_instances.size()); + auto lod_index = gpu_mesh.lod_count - 1; const auto &lod = gpu_mesh.lods[lod_index]; + auto &mesh_instance = gpu_mesh_instances.emplace_back(); + mesh_instance.mesh_index = mesh_index; + mesh_instance.lod_index = lod_index; + mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; + mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; + for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_index = mesh_index; - meshlet_instance.lod_index = lod_index; - meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; + meshlet_instance.mesh_instance_index = mesh_instance_index; meshlet_instance.meshlet_index = meshlet_index; } } @@ -757,6 +765,7 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { return SceneComposeInfo{ .gpu_meshes = std::move(gpu_meshes), + .gpu_mesh_instances = std::move(gpu_mesh_instances), .gpu_meshlet_instances = std::move(gpu_meshlet_instances), }; } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 52f0514d..3406124a 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -237,42 +237,57 @@ auto SceneRenderer::compose(this SceneRenderer &self, SceneComposeInfo &compose_ // IMPORTANT: Only wait when buffer is being resized!!! // We can still copy into gpu buffer if it has enough space. - if (ls::size_bytes(compose_info.gpu_meshes) > self.meshes_buffer.data_size()) { - if (self.meshes_buffer) { + if (ls::size_bytes(compose_info.gpu_meshlet_instances) > self.meshlet_instances_buffer.data_size()) { + if (self.meshlet_instances_buffer) { self.device->wait(); - self.device->destroy(self.meshes_buffer.id()); + self.device->destroy(self.meshlet_instances_buffer.id()); } - self.meshes_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshes)).value(); + self.meshlet_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshlet_instances)).value(); } - if (ls::size_bytes(compose_info.gpu_meshlet_instances) > self.meshlet_instances_buffer.data_size()) { - if (self.meshlet_instances_buffer) { + if (ls::size_bytes(compose_info.gpu_mesh_instances) > self.mesh_instances_buffer.data_size()) { + if (self.mesh_instances_buffer) { self.device->wait(); - self.device->destroy(self.meshlet_instances_buffer.id()); + self.device->destroy(self.mesh_instances_buffer.id()); } - self.meshlet_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshlet_instances)).value(); + self.mesh_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_mesh_instances)).value(); } - self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); - auto meshes_buffer = vuk::Value{}; - if (!compose_info.gpu_meshes.empty()) { - meshes_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshes), self.meshes_buffer); + if (ls::size_bytes(compose_info.gpu_meshes) > self.meshes_buffer.data_size()) { + if (self.meshes_buffer) { + self.device->wait(); + self.device->destroy(self.meshes_buffer.id()); + } + + self.meshes_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshes)).value(); } + self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); auto meshlet_instances_buffer = vuk::Value{}; if (!compose_info.gpu_meshlet_instances.empty()) { meshlet_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshlet_instances), self.meshlet_instances_buffer); } + auto mesh_instances_buffer = vuk::Value{}; + if (!compose_info.gpu_mesh_instances.empty()) { + mesh_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_mesh_instances), self.mesh_instances_buffer); + } + + auto meshes_buffer = vuk::Value{}; + if (!compose_info.gpu_meshes.empty()) { + meshes_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshes), self.meshes_buffer); + } + if (self.exposure_buffer) { vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); } return ComposedScene{ - .meshes_buffer = meshes_buffer, .meshlet_instances_buffer = meshlet_instances_buffer, + .mesh_instances_buffer = mesh_instances_buffer, + .meshes_buffer = meshes_buffer, }; } @@ -293,6 +308,11 @@ auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { self.meshlet_instances_buffer = {}; } + if (self.mesh_instances_buffer) { + self.device->destroy(self.mesh_instances_buffer.id()); + self.mesh_instances_buffer = {}; + } + if (self.meshes_buffer) { self.device->destroy(self.meshes_buffer.id()); self.meshes_buffer = {}; @@ -519,14 +539,17 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: } if (self.meshlet_instance_count) { - auto meshes_buffer = vuk::Value{}; auto meshlet_instances_buffer = vuk::Value{}; + auto mesh_instances_buffer = vuk::Value{}; + auto meshes_buffer = vuk::Value{}; if (composed_scene.has_value()) { - meshes_buffer = std::move(composed_scene->meshes_buffer); meshlet_instances_buffer = std::move(composed_scene->meshlet_instances_buffer); + mesh_instances_buffer = std::move(composed_scene->mesh_instances_buffer); + meshes_buffer = std::move(composed_scene->meshes_buffer); } else { + meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "meshlet instances", vuk::Access::eNone); + mesh_instances_buffer = self.mesh_instances_buffer.acquire(*self.device, "mesh instances", vuk::Access::eNone); meshes_buffer = self.meshes_buffer.acquire(*self.device, "meshes", vuk::Access::eNone); - meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "Meshlet Instances", vuk::Access::eNone); } auto materials_buffer = std::move(info.materials_buffer); @@ -537,37 +560,40 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: "vis cull meshlets", [meshlet_instance_count = self.meshlet_instance_count, cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeRead) camera, - VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeRead) hiz, + VUK_BA(vuk::eComputeRW) cull_triangles_cmd, + VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // .bind_compute_pipeline("passes.cull_meshlets") - .bind_buffer(0, 0, cull_triangles_cmd) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_image(0, 6, hiz) - .bind_sampler(0, 7, hiz_sampler_info) - .bind_buffer(0, 8, debug_drawer) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_image(0, 5, hiz) + .bind_sampler(0, 6, hiz_sampler_info) + .bind_buffer(0, 7, cull_triangles_cmd) + .bind_buffer(0, 8, visible_meshlet_instances_indices) + .bind_buffer(0, 9, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(meshlet_instance_count, cull_flags)) - .dispatch((meshlet_instance_count + Model::MAX_MESHLET_INDICES - 1) / Model::MAX_MESHLET_INDICES); + .dispatch_invocations(meshlet_instance_count); return std::make_tuple( - cull_triangles_cmd, camera, - visible_meshlet_instances_indices, meshlet_instances, + mesh_instances, meshes, transforms, hiz, + cull_triangles_cmd, + visible_meshlet_instances_indices, debug_drawer ); } @@ -578,23 +604,25 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, self.meshlet_instance_count * sizeof(u32)); std::tie( - cull_triangles_cmd_buffer, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, transforms_buffer, hiz_attachment, + cull_triangles_cmd_buffer, + visible_meshlet_instances_indices_buffer, debug_drawer_buffer ) = vis_cull_meshlets_pass( - std::move(cull_triangles_cmd_buffer), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), std::move(hiz_attachment), + std::move(cull_triangles_cmd_buffer), + std::move(visible_meshlet_instances_indices_buffer), std::move(debug_drawer_buffer) ); @@ -604,33 +632,36 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: [cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, - VUK_BA(vuk::eComputeRW) draw_indexed_cmd, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, + VUK_BA(vuk::eComputeRW) draw_indexed_cmd, VUK_BA(vuk::eComputeWrite) reordered_indices ) { cmd_list // .bind_compute_pipeline("passes.cull_triangles") - .bind_buffer(0, 0, draw_indexed_cmd) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, visible_meshlet_instances_indices) + .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 3, mesh_instances) .bind_buffer(0, 4, meshes) .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, reordered_indices) + .bind_buffer(0, 6, draw_indexed_cmd) + .bind_buffer(0, 7, reordered_indices) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) .dispatch_indirect(cull_triangles_cmd); return std::make_tuple( - draw_indexed_cmd, camera, visible_meshlet_instances_indices, meshlet_instances, + mesh_instances, meshes, transforms, + draw_indexed_cmd, reordered_indices ); } @@ -643,22 +674,24 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: ); std::tie( - draw_command_buffer, camera_buffer, visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, transforms_buffer, + draw_command_buffer, reordered_indices_buffer ) = vis_cull_triangles_pass( std::move(cull_triangles_cmd_buffer), - std::move(draw_command_buffer), std::move(camera_buffer), std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), + std::move(draw_command_buffer), std::move(reordered_indices_buffer) ); @@ -710,13 +743,14 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, VUK_BA(vuk::eIndexRead) index_buffer, - VUK_IA(vuk::eColorRW) visbuffer, - VUK_IA(vuk::eDepthStencilRW) depth, VUK_BA(vuk::eVertexRead) camera, VUK_BA(vuk::eVertexRead) meshlet_instances, - VUK_BA(vuk::eVertexRead) transforms, + VUK_BA(vuk::eVertexRead) mesh_instances, VUK_BA(vuk::eVertexRead) meshes, + VUK_BA(vuk::eVertexRead) transforms, VUK_BA(vuk::eFragmentRead) materials, + VUK_IA(vuk::eColorRW) visbuffer, + VUK_IA(vuk::eDepthStencilRW) depth, VUK_IA(vuk::eFragmentRW) overdraw ) { cmd_list // @@ -730,37 +764,40 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_persistent(1, *descriptor_set) .bind_buffer(0, 0, camera) .bind_buffer(0, 1, meshlet_instances) - .bind_buffer(0, 2, meshes) - .bind_buffer(0, 3, transforms) - .bind_buffer(0, 4, materials) - .bind_image(0, 5, overdraw) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_buffer(0, 5, materials) + .bind_image(0, 6, overdraw) .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) .draw_indexed_indirect(1, triangle_indirect); - return std::make_tuple(visbuffer, depth, camera, meshlet_instances, transforms, meshes, materials, overdraw); + return std::make_tuple(camera, meshlet_instances, mesh_instances, meshes, transforms, materials, visbuffer, depth, overdraw); } ); std::tie( - visbuffer_attachment, - depth_attachment, camera_buffer, meshlet_instances_buffer, - transforms_buffer, + mesh_instances_buffer, meshes_buffer, + transforms_buffer, materials_buffer, + visbuffer_attachment, + depth_attachment, overdraw_attachment ) = vis_encode_pass( std::move(draw_command_buffer), std::move(reordered_indices_buffer), - std::move(visbuffer_attachment), - std::move(depth_attachment), std::move(camera_buffer), std::move(meshlet_instances_buffer), - std::move(transforms_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), + std::move(transforms_buffer), std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(depth_attachment), std::move(overdraw_attachment) ); @@ -772,12 +809,14 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: vuk::CommandBuffer &cmd_list, VUK_IA(vuk::eComputeSampled) visbuffer, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeWrite) picked_transform_index_buffer ) { cmd_list // .bind_compute_pipeline("passes.editor_mousepick") .bind_image(0, 0, visbuffer) .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) .push_constants( vuk::ShaderStageFlagBits::eCompute, 0, @@ -790,7 +829,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: ); auto picking_texel_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUtoCPU, sizeof(u32)); - auto picked_texel = editor_mousepick_pass(visbuffer_attachment, meshlet_instances_buffer, picking_texel_buffer); + auto picked_texel = editor_mousepick_pass(visbuffer_attachment, meshlet_instances_buffer, mesh_instances_buffer, picking_texel_buffer); vuk::Compiler temp_compiler; picked_texel.wait(self.device->get_allocator(), temp_compiler); @@ -842,16 +881,17 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: "vis decode", [descriptor_set = materials_set]( // vuk::CommandBuffer &cmd_list, - VUK_IA(vuk::eColorRW) albedo, - VUK_IA(vuk::eColorRW) normal, - VUK_IA(vuk::eColorRW) emissive, - VUK_IA(vuk::eColorRW) metallic_roughness_occlusion, - VUK_IA(vuk::eFragmentRead) visbuffer, VUK_BA(vuk::eFragmentRead) camera, VUK_BA(vuk::eFragmentRead) meshlet_instances, + VUK_BA(vuk::eFragmentRead) mesh_instances, VUK_BA(vuk::eFragmentRead) meshes, VUK_BA(vuk::eFragmentRead) transforms, - VUK_BA(vuk::eFragmentRead) materials + VUK_BA(vuk::eFragmentRead) materials, + VUK_IA(vuk::eFragmentRead) visbuffer, + VUK_IA(vuk::eColorRW) albedo, + VUK_IA(vuk::eColorRW) normal, + VUK_IA(vuk::eColorRW) emissive, + VUK_IA(vuk::eColorRW) metallic_roughness_occlusion ) { cmd_list // .bind_graphics_pipeline("passes.visbuffer_decode") @@ -865,24 +905,26 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .set_viewport(0, vuk::Rect2D::framebuffer()) .set_scissor(0, vuk::Rect2D::framebuffer()) .bind_persistent(1, *descriptor_set) - .bind_image(0, 0, visbuffer) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) .bind_buffer(0, 3, meshes) .bind_buffer(0, 4, transforms) .bind_buffer(0, 5, materials) + .bind_image(0, 6, visbuffer) .draw(3, 1, 0, 1); return std::make_tuple( - albedo, - normal, - emissive, - metallic_roughness_occlusion, - visbuffer, camera, meshlet_instances, + mesh_instances, meshes, - transforms + transforms, + visbuffer, + albedo, + normal, + emissive, + metallic_roughness_occlusion ); } ); @@ -924,27 +966,29 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: metallic_roughness_occlusion_attachment = vuk::clear_image(std::move(metallic_roughness_occlusion_attachment), vuk::Black); std::tie( - albedo_attachment, - normal_attachment, - emissive_attachment, - metallic_roughness_occlusion_attachment, - visbuffer_attachment, camera_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, - transforms_buffer + transforms_buffer, + visbuffer_attachment, + albedo_attachment, + normal_attachment, + emissive_attachment, + metallic_roughness_occlusion_attachment ) = vis_decode_pass( - std::move(albedo_attachment), - std::move(normal_attachment), - std::move(emissive_attachment), - std::move(metallic_roughness_occlusion_attachment), - std::move(visbuffer_attachment), std::move(camera_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), - std::move(materials_buffer) + std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(albedo_attachment), + std::move(normal_attachment), + std::move(emissive_attachment), + std::move(metallic_roughness_occlusion_attachment) ); if (info.atmosphere.has_value()) { diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 060bb9ad..1abd4ddb 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -7,12 +7,14 @@ namespace lr { struct SceneComposeInfo { std::vector gpu_meshes = {}; + std::vector gpu_mesh_instances = {}; std::vector gpu_meshlet_instances = {}; }; struct ComposedScene { - vuk::Value meshes_buffer = {}; vuk::Value meshlet_instances_buffer = {}; + vuk::Value mesh_instances_buffer = {}; + vuk::Value meshes_buffer = {}; }; struct SceneRenderInfo { @@ -43,8 +45,9 @@ struct SceneRenderer { Buffer exposure_buffer = {}; Buffer transforms_buffer = {}; u32 meshlet_instance_count = 0; - Buffer meshes_buffer = {}; Buffer meshlet_instances_buffer = {}; + Buffer mesh_instances_buffer = {}; + Buffer meshes_buffer = {}; // Then what are they? // TODO: Per scene sky settings From 20f0f4678d2c432e1120c749fa11bb63a658dbb1 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Mon, 28 Jul 2025 19:24:26 +0300 Subject: [PATCH 10/16] mesh culling --- Lorr/Engine/Asset/Asset.cc | 2 +- .../shaders/passes/cull_meshes.slang | 16 +++-- .../shaders/passes/cull_meshlets.slang | 7 -- Lorr/Engine/Scene/Scene.cc | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 71 +++++++++++++++++-- Lorr/Engine/Scene/SceneRenderer.hh | 1 + 6 files changed, 81 insertions(+), 18 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 303b4297..6edf2b13 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -922,7 +922,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { meshlet_aabb.aabb_max = meshlet_bb_max; gpu_mesh.bounds.aabb_max = glm::max(gpu_mesh.bounds.aabb_max, meshlet_bb_max); - gpu_mesh.bounds.aabb_min = glm::max(gpu_mesh.bounds.aabb_min, meshlet_bb_min); + gpu_mesh.bounds.aabb_min = glm::min(gpu_mesh.bounds.aabb_min, meshlet_bb_min); } auto lod_upload_size = 0 // diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 05b2b4e0..119b9083 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -5,12 +5,13 @@ import cull; import debug_drawer; struct ShaderParameters { - RWStructuredBuffer cull_meshlets_cmd; ConstantBuffer camera; - RWStructuredBuffer visible_mesh_instances_indices; - StructuredBuffer mesh_instance; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; + + RWStructuredBuffer cull_meshlets_cmd; + RWStructuredBuffer visible_mesh_instances_indices; RWStructuredBuffer debug_drawer; }; @@ -18,6 +19,10 @@ struct ShaderParameters { #define CULLING_MESHES_COUNT 64 #endif +#ifndef CULLING_MESHLET_COUNT + #define CULLING_MESHLET_COUNT 64 +#endif + [[shader("compute")]] [[numthreads(CULLING_MESHES_COUNT, 1, 1)]] func cs_main( @@ -31,7 +36,7 @@ func cs_main( return; } - let mesh_instance = params.mesh_instance[mesh_instance_index]; + let mesh_instance = params.mesh_instances[mesh_instance_index]; let mesh = params.meshes[mesh_instance.mesh_index]; let mesh_lod = mesh.lods[mesh_instance.lod_index]; let transform = params.transforms[mesh_instance.transform_index]; @@ -55,7 +60,8 @@ func cs_main( } if (visible) { - let index = std::atomic_add(params.cull_meshlets_cmd[0].x, mesh_lod.meshlet_count, std::memory_order_relaxed); + let workgroup_count = (mesh_lod.meshlet_count + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; + let index = std::atomic_add(params.cull_meshlets_cmd[0].x, workgroup_count, std::memory_order_relaxed); params.visible_mesh_instances_indices[index] = mesh_instance_index; } } \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 5ad3b95f..f5aa2d46 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -51,13 +51,6 @@ func cs_main( if (visible && (cull_flags & CullFlags::MeshletFrustum)) { let cur_mvp = mul(params.camera.projection_view_mat, transform.world); visible = test_frustum(cur_mvp, aabb_center, aabb_extent); - - var debug_aabb = DebugAABB(); - debug_aabb.position = mul(transform.world, f32x4(aabb_center, 1.0)).xyz; - debug_aabb.size = mul(transform.world, f32x4(aabb_extent, 0.0)).xyz; - debug_aabb.color = f32x3(0.0, 1.0, 0.0); - debug_aabb.coord = DebugDrawCoord::World; - debug_draw_aabb(params.debug_drawer[0], debug_aabb); } if (visible && (cull_flags & CullFlags::Occlusion)) { diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index b1ab0324..dbcbea45 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -745,7 +745,7 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { for (const auto transform_id : transform_ids) { auto mesh_instance_index = static_cast(gpu_mesh_instances.size()); - auto lod_index = gpu_mesh.lod_count - 1; + auto lod_index = 0; const auto &lod = gpu_mesh.lods[lod_index]; auto &mesh_instance = gpu_mesh_instances.emplace_back(); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 3406124a..b7c9bc18 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -33,6 +33,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ .definitions = { + { "CULLING_MESH_COUNT", "64" }, { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, { "CULLING_TRIANGLE_COUNT", std::to_string(Model::MAX_MESHLET_PRIMITIVES) }, { "MESH_MAX_LODS", std::to_string(GPU::Mesh::MAX_LODS) }, @@ -100,6 +101,12 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi Pipeline::create(*self.device, default_slang_session, sky_final_pipeline_info).value(); // ── VISBUFFER ─────────────────────────────────────────────────────── + auto vis_cull_meshes_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.cull_meshes", + .entry_points = { "cs_main" }, + }; + Pipeline::create(*self.device, default_slang_session, vis_cull_meshes_pipeline_info).value(); + auto vis_cull_meshlets_pipeline_info = PipelineCompileInfo{ .module_name = "passes.cull_meshlets", .entry_points = { "cs_main" }, @@ -264,7 +271,6 @@ auto SceneRenderer::compose(this SceneRenderer &self, SceneComposeInfo &compose_ self.meshes_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshes)).value(); } - self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); auto meshlet_instances_buffer = vuk::Value{}; if (!compose_info.gpu_meshlet_instances.empty()) { meshlet_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshlet_instances), self.meshlet_instances_buffer); @@ -284,6 +290,9 @@ auto SceneRenderer::compose(this SceneRenderer &self, SceneComposeInfo &compose_ vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); } + self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); + self.mesh_instance_count = compose_info.gpu_mesh_instances.size(); + return ComposedScene{ .meshlet_instances_buffer = meshlet_instances_buffer, .mesh_instances_buffer = mesh_instances_buffer, @@ -296,7 +305,7 @@ auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { self.device->wait(); - self.meshlet_instance_count = 0; + self.mesh_instance_count = 0; if (self.transforms_buffer) { self.device->destroy(self.transforms_buffer.id()); @@ -538,7 +547,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: camera_buffer = transfer_man.scratch_buffer(info.camera.value()); } - if (self.meshlet_instance_count) { + if (self.mesh_instance_count) { auto meshlet_instances_buffer = vuk::Value{}; auto mesh_instances_buffer = vuk::Value{}; auto meshes_buffer = vuk::Value{}; @@ -555,11 +564,64 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: auto materials_buffer = std::move(info.materials_buffer); auto *materials_set = info.materials_descriptor_set; + // ── CULL MESHES ───────────────────────────────────────────────────── + auto vis_cull_meshes_pass = vuk::make_pass( + "vis cull meshes", + [mesh_instance_count = self.mesh_instance_count, cull_flags = info.cull_flags]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) mesh_instances, + VUK_BA(vuk::eComputeRead) meshes, + VUK_BA(vuk::eComputeRead) transforms, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd, + VUK_BA(vuk::eComputeWrite) visible_mesh_instances_indices, + VUK_BA(vuk::eComputeRW) debug_drawer + ) { + cmd_list // + .bind_compute_pipeline("passes.cull_meshes") + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, mesh_instances) + .bind_buffer(0, 2, meshes) + .bind_buffer(0, 3, transforms) + .bind_buffer(0, 4, cull_meshlets_cmd) + .bind_buffer(0, 5, visible_mesh_instances_indices) + .bind_buffer(0, 6, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) + .dispatch_invocations(mesh_instance_count); + + return std::make_tuple(camera, mesh_instances, meshes, transforms, cull_meshlets_cmd, visible_mesh_instances_indices, debug_drawer); + } + ); + + auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); + auto visible_mesh_instances_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, self.mesh_instance_count * sizeof(u32)); + + std::tie( + camera_buffer, + mesh_instances_buffer, + meshes_buffer, + transforms_buffer, + cull_meshlets_cmd_buffer, + visible_mesh_instances_indices_buffer, + debug_drawer_buffer + ) = + vis_cull_meshes_pass( + std::move(camera_buffer), + std::move(mesh_instances_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(cull_meshlets_cmd_buffer), + std::move(visible_mesh_instances_indices_buffer), + std::move(debug_drawer_buffer) + ); + // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( "vis cull meshlets", [meshlet_instance_count = self.meshlet_instance_count, cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) meshlet_instances, VUK_BA(vuk::eComputeRead) mesh_instances, @@ -583,7 +645,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_buffer(0, 8, visible_meshlet_instances_indices) .bind_buffer(0, 9, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(meshlet_instance_count, cull_flags)) - .dispatch_invocations(meshlet_instance_count); + .dispatch_indirect(dispatch_cmd); return std::make_tuple( camera, @@ -615,6 +677,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: debug_drawer_buffer ) = vis_cull_meshlets_pass( + std::move(cull_meshlets_cmd_buffer), std::move(camera_buffer), std::move(meshlet_instances_buffer), std::move(mesh_instances_buffer), diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 1abd4ddb..7c5bc174 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -46,6 +46,7 @@ struct SceneRenderer { Buffer transforms_buffer = {}; u32 meshlet_instance_count = 0; Buffer meshlet_instances_buffer = {}; + u32 mesh_instance_count = 0; Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; From cf14e7bc91c296207622d7138fa635926f8f914d Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 3 Aug 2025 20:26:14 +0300 Subject: [PATCH 11/16] refactor gpu buffers and materials --- Lorr/Engine/Asset/Asset.cc | 192 ++------- Lorr/Engine/Asset/Asset.hh | 6 +- Lorr/Engine/Core/JobManager.cc | 6 +- Lorr/Engine/Graphics/Vulkan.hh | 6 +- Lorr/Engine/Graphics/Vulkan/Buffer.cc | 13 + Lorr/Engine/Resources/shaders/assert.slang | 7 + .../shaders/passes/cull_meshes.slang | 4 +- .../shaders/passes/cull_meshlets.slang | 5 +- .../Resources/shaders/passes/debug.slang | 26 +- .../shaders/passes/sky_transmittance.slang | 2 +- .../shaders/passes/visbuffer_decode.slang | 2 +- .../shaders/passes/visbuffer_encode.slang | 6 +- Lorr/Engine/Resources/shaders/scene.slang | 11 +- Lorr/Engine/Scene/GPUScene.hh | 8 +- Lorr/Engine/Scene/Scene.cc | 150 +++++--- Lorr/Engine/Scene/Scene.hh | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 364 ++++++++++-------- Lorr/Engine/Scene/SceneRenderer.hh | 51 ++- shell.nix | 30 +- 19 files changed, 469 insertions(+), 422 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/assert.slang diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 6edf2b13..94cb3598 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -123,12 +123,11 @@ struct Handle::Impl { std::shared_mutex textures_mutex = {}; SlotMap textures = {}; + std::vector dirty_textures = {}; std::shared_mutex materials_mutex = {}; - vuk::PersistentDescriptorSet materials_descriptor_set = {}; SlotMap materials = {}; std::vector dirty_materials = {}; - Buffer materials_buffer = {}; SlotMap, SceneID> scenes = {}; }; @@ -142,12 +141,6 @@ auto AssetManager::create(Device *device) -> AssetManager { impl->device = device; impl->root_path = fs::current_path(); - BindlessDescriptorInfo bindless_set_info[] = { - { .binding = 0, .type = vuk::DescriptorType::eSampler, .descriptor_count = 1024 }, - { .binding = 1, .type = vuk::DescriptorType::eSampledImage, .descriptor_count = 1024 }, - }; - impl->materials_descriptor_set = device->create_persistent_descriptor_set(bindless_set_info, 1).release(); - return self; } @@ -155,9 +148,6 @@ auto AssetManager::destroy() -> void { ZoneScoped; auto read_lock = std::shared_lock(impl->registry_mutex); - if (impl->materials_buffer) { - impl->device->destroy(impl->materials_buffer.id()); - } for (const auto &[asset_uuid, asset] : impl->registry) { // sanity check @@ -955,7 +945,11 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { std::memcpy(cpu_lod_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); upload_offset += ls::size_bytes(indirect_vertex_indices); + cur_lod.indices_count = simplified_indices.size(); cur_lod.meshlet_count = meshlet_count; + cur_lod.meshlet_bounds_count = meshlet_bounds.size(); + cur_lod.local_triangle_indices_count = local_triangle_indices.size(); + cur_lod.indirect_vertex_indices_count = indirect_vertex_indices.size(); lod_cpu_buffers[lod_index] = ls::pair(cpu_lod_buffer, lod_upload_size); upload_size += lod_upload_size; @@ -1200,6 +1194,8 @@ auto AssetManager::load_texture(const UUID &uuid, const TextureInfo &info) -> bo }; auto sampler = Sampler::create(*impl->device, sampler_info).value(); asset->texture_id = impl->textures.create_slot(Texture{ .image = image, .image_view = image_view, .sampler = sampler }); + write_lock.unlock(); + this->set_texture_dirty(asset->texture_id); } LOG_TRACE("Loaded texture {}.", uuid.str()); @@ -1231,7 +1227,7 @@ auto AssetManager::unload_texture(const UUID &uuid) -> bool { auto AssetManager::is_texture_loaded(const UUID &uuid) -> bool { ZoneScoped; - std::shared_lock _(impl->textures_mutex); + auto read_lock = std::shared_lock(impl->textures_mutex); auto *asset = this->get_asset(uuid); if (!asset) { return false; @@ -1649,166 +1645,56 @@ auto AssetManager::get_scene(SceneID scene_id) -> Scene * { return impl->scenes.slot(scene_id)->get(); } -auto AssetManager::set_material_dirty(MaterialID material_id) -> void { +auto AssetManager::set_texture_dirty(TextureID texture_id) -> void { ZoneScoped; - std::shared_lock shared_lock(impl->materials_mutex); - if (std::ranges::find(impl->dirty_materials, material_id) != impl->dirty_materials.end()) { + auto read_lock = std::shared_lock(impl->textures_mutex); + if (std::ranges::find(impl->dirty_textures, texture_id) != impl->dirty_textures.end()) { return; } - shared_lock.unlock(); - impl->materials_mutex.lock(); - impl->dirty_materials.emplace_back(material_id); - impl->materials_mutex.unlock(); + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->textures_mutex); + impl->dirty_textures.emplace_back(texture_id); } -auto AssetManager::get_materials_buffer() -> vuk::Value { +auto AssetManager::get_dirty_texture_ids() -> std::vector { ZoneScoped; - auto uuid_to_index = [this](UUID &uuid) -> ls::option { - if (!this->is_texture_loaded(uuid)) { - return ls::nullopt; - } - - auto *texture_asset = this->get_asset(uuid); - auto *texture = this->get_texture(texture_asset->texture_id); - auto texture_index = SlotMap_decode_id(texture_asset->texture_id).index; - auto image_view = impl->device->image_view(texture->image_view.id()); - auto sampler = impl->device->sampler(texture->sampler.id()); - - impl->materials_descriptor_set.update_sampler(0, texture_index, sampler.value()); - impl->materials_descriptor_set.update_sampled_image(1, texture_index, image_view.value(), vuk::ImageLayout::eShaderReadOnlyOptimal); - - return texture_index; - }; - - auto to_gpu_material = [&](Material *material) -> GPU::Material { - auto albedo_image_index = uuid_to_index(material->albedo_texture); - auto normal_image_index = uuid_to_index(material->normal_texture); - auto emissive_image_index = uuid_to_index(material->emissive_texture); - auto metallic_roughness_image_index = uuid_to_index(material->metallic_roughness_texture); - auto occlusion_image_index = uuid_to_index(material->occlusion_texture); - - auto flags = GPU::MaterialFlag::None; - // flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; - // flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - // flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - // flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - // flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; - // flags |= GPU::MaterialFlag::NormalFlipY; - - return { - .albedo_color = material->albedo_color, - .emissive_color = material->emissive_color, - .roughness_factor = material->roughness_factor, - .metallic_factor = material->metallic_factor, - .alpha_cutoff = material->alpha_cutoff, - .flags = flags, - .albedo_image_index = albedo_image_index.value_or(~0_u32), - .normal_image_index = normal_image_index.value_or(~0_u32), - .emissive_image_index = emissive_image_index.value_or(~0_u32), - .metallic_roughness_image_index = metallic_roughness_image_index.value_or(~0_u32), - .occlusion_image_index = occlusion_image_index.value_or(~0_u32), - }; - }; - - auto all_materials_count = 0_sz; - auto dirty_materials = std::vector(); - { - auto read_lock = std::shared_lock(impl->materials_mutex); - if (impl->materials.size() == 0) { - return {}; - } - - read_lock.unlock(); - auto write_lock = std::unique_lock(impl->materials_mutex); - - all_materials_count = impl->materials.size(); - - // DO NOT MOVE!!! just take a snapshot of the contents - dirty_materials = impl->dirty_materials; - impl->dirty_materials.clear(); - } + auto read_lock = std::shared_lock(impl->textures_mutex); + auto dirty_textures = std::vector(impl->dirty_textures); - auto gpu_materials_bytes_size = all_materials_count * sizeof(GPU::Material); - auto dirty_material_count = dirty_materials.size(); - auto dirty_materials_size_bytes = dirty_materials.size() * sizeof(GPU::Material); + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->textures_mutex); + impl->dirty_textures.clear(); - auto materials_buffer = vuk::Value{}; - bool rebuild_materials = false; - if (gpu_materials_bytes_size > impl->materials_buffer.data_size()) { - if (impl->materials_buffer.id() != BufferID::Invalid) { - impl->device->wait(); - impl->device->destroy(impl->materials_buffer.id()); - } - - impl->materials_buffer = Buffer::create(*impl->device, gpu_materials_bytes_size, vuk::MemoryUsage::eGPUonly).value(); - materials_buffer = impl->materials_buffer.acquire(*impl->device, "materials buffer", vuk::eNone); - vuk::fill(materials_buffer, ~0_u32); - rebuild_materials = true; - } else if (impl->materials_buffer) { - materials_buffer = impl->materials_buffer.acquire(*impl->device, "materials buffer", vuk::eNone); - } - - auto &transfer_man = impl->device->transfer_man(); - if (rebuild_materials) { - auto _ = std::shared_lock(impl->registry_mutex); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, gpu_materials_bytes_size); - auto *dst_material_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - - // All loaded materials - auto all_materials = impl->materials.slots_unsafe(); - for (auto &dirty_material : all_materials) { - auto gpu_material = to_gpu_material(&dirty_material); - std::memcpy(dst_material_ptr, &gpu_material, sizeof(GPU::Material)); - dst_material_ptr++; - } - - materials_buffer = transfer_man.upload_staging(std::move(upload_buffer), std::move(materials_buffer)); - } else if (dirty_material_count != 0) { - auto upload_offsets = std::vector(dirty_material_count); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, dirty_materials_size_bytes); - auto *dst_material_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - for (const auto &[dirty_material_id, offset] : std::views::zip(dirty_materials, upload_offsets)) { - auto index = SlotMap_decode_id(dirty_material_id).index; - auto *material = this->get_material(dirty_material_id); - auto gpu_material = to_gpu_material(material); - - std::memcpy(dst_material_ptr, &gpu_material, sizeof(GPU::Material)); - offset = index * sizeof(GPU::Material); - dst_material_ptr++; - } - - auto update_materials_pass = vuk::make_pass( - "update materials", - [upload_offsets = std::move( - upload_offsets - )](vuk::CommandBuffer &cmd_list, VUK_BA(vuk::Access::eTransferRead) src_buffer, VUK_BA(vuk::Access::eTransferWrite) dst_buffer) { - for (usize i = 0; i < upload_offsets.size(); i++) { - auto offset = upload_offsets[i]; - auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Material), sizeof(GPU::Material)); - auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Material)); - cmd_list.copy_buffer(src_subrange, dst_subrange); - } + return dirty_textures; +} - return dst_buffer; - } - ); +auto AssetManager::set_material_dirty(MaterialID material_id) -> void { + ZoneScoped; - materials_buffer = update_materials_pass(std::move(upload_buffer), std::move(materials_buffer)); - } else { - return materials_buffer; + auto read_lock = std::shared_lock(impl->materials_mutex); + if (std::ranges::find(impl->dirty_materials, material_id) != impl->dirty_materials.end()) { + return; } - impl->device->commit_descriptor_set(impl->materials_descriptor_set); - return materials_buffer; + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->materials_mutex); + impl->dirty_materials.emplace_back(material_id); } -auto AssetManager::get_materials_descriptor_set() -> vuk::PersistentDescriptorSet * { +auto AssetManager::get_dirty_material_ids() -> std::vector { ZoneScoped; - return &impl->materials_descriptor_set; + auto read_lock = std::shared_lock(impl->materials_mutex); + auto dirty_materials = std::vector(impl->dirty_materials); + + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->materials_mutex); + impl->dirty_materials.clear(); + + return dirty_materials; } } // namespace lr diff --git a/Lorr/Engine/Asset/Asset.hh b/Lorr/Engine/Asset/Asset.hh index ed82458f..cbb1690f 100755 --- a/Lorr/Engine/Asset/Asset.hh +++ b/Lorr/Engine/Asset/Asset.hh @@ -118,8 +118,10 @@ struct AssetManager : Handle { auto get_scene(const UUID &uuid) -> Scene *; auto get_scene(SceneID scene_id) -> Scene *; + auto set_texture_dirty(TextureID texture_id) -> void; + auto get_dirty_texture_ids() -> std::vector; + auto set_material_dirty(MaterialID material_id) -> void; - auto get_materials_buffer() -> vuk::Value; - auto get_materials_descriptor_set() -> vuk::PersistentDescriptorSet *; + auto get_dirty_material_ids() -> std::vector; }; } // namespace lr diff --git a/Lorr/Engine/Core/JobManager.cc b/Lorr/Engine/Core/JobManager.cc index 60f9fd8b..f5258a29 100644 --- a/Lorr/Engine/Core/JobManager.cc +++ b/Lorr/Engine/Core/JobManager.cc @@ -79,8 +79,10 @@ auto JobManager::worker(this JobManager &self, u32 id) -> void { memory::ScopedStack stack; this_thread_worker.id = id; - os::set_thread_name(stack.format("Worker {}", id)); - fmtlog::setThreadName(stack.format_char("Worker {}", id)); + + const auto *thread_name = stack.format_char("Worker {}", id); + os::set_thread_name(thread_name); + fmtlog::setThreadName(thread_name); LS_DEFER() { this_thread_worker.id = ~0_u32; diff --git a/Lorr/Engine/Graphics/Vulkan.hh b/Lorr/Engine/Graphics/Vulkan.hh index b3b221d3..1f89cc50 100644 --- a/Lorr/Engine/Graphics/Vulkan.hh +++ b/Lorr/Engine/Graphics/Vulkan.hh @@ -22,7 +22,7 @@ enum class PipelineID : u64 { Invalid = ~0_u64 }; struct Device; struct Buffer { - static auto create(Device &, u64 size, vuk::MemoryUsage memory_usage = vuk::MemoryUsage::eGPUonly, LR_THISCALL) + [[nodiscard]] static auto create(Device &, u64 size, vuk::MemoryUsage memory_usage = vuk::MemoryUsage::eGPUonly, LR_THISCALL) -> std::expected; auto data_size() const -> u64; @@ -30,6 +30,10 @@ struct Buffer { auto host_ptr() const -> u8 *; auto id() const -> BufferID; + // if new_size is smaller than current size, this will do nothing + [[nodiscard]] auto resize(Device &, u64 new_size, vuk::MemoryUsage memory_usage = vuk::MemoryUsage::eGPUonly, LR_THISCALL) + -> std::expected; + auto acquire(Device &, vuk::Name name, vuk::Access access, u64 offset = 0, u64 size = ~0_u64) -> vuk::Value; auto discard(Device &, vuk::Name name, u64 offset = 0, u64 size = ~0_u64) -> vuk::Value; auto subrange(Device &, u64 offset = 0, u64 size = ~0_u64) -> vuk::Buffer; diff --git a/Lorr/Engine/Graphics/Vulkan/Buffer.cc b/Lorr/Engine/Graphics/Vulkan/Buffer.cc index 4022b0ed..4f933da8 100644 --- a/Lorr/Engine/Graphics/Vulkan/Buffer.cc +++ b/Lorr/Engine/Graphics/Vulkan/Buffer.cc @@ -43,6 +43,19 @@ auto Buffer::id() const -> BufferID { return id_; } +auto Buffer::resize(Device &device, u64 new_size, vuk::MemoryUsage memory_usage, LR_CALLSTACK) -> std::expected { + if (new_size > this->data_size()) { + if (this->id() != BufferID::Invalid) { + device.wait(); + device.destroy(this->id()); + } + + return Buffer::create(device, new_size, memory_usage, LOC); + } + + return *this; +} + auto Buffer::acquire(Device &device, vuk::Name name, vuk::Access access, u64 offset, u64 size) -> vuk::Value { ZoneScoped; diff --git a/Lorr/Engine/Resources/shaders/assert.slang b/Lorr/Engine/Resources/shaders/assert.slang new file mode 100644 index 00000000..d1c6307c --- /dev/null +++ b/Lorr/Engine/Resources/shaders/assert.slang @@ -0,0 +1,7 @@ +#ifdef ENABLE_ASSERTIONS +#define assert_msg(x, msg, ...) do { if (!bool(x)) { printf(msg, __VA_ARGS__); } } while(false) +#define assert(x) assert_msg(x, "Shader aborted at " __FILE__ ":%d", __LINE__) +#else +#define assert_msg(...) +#define assert(...) +#endif diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang index 119b9083..f5892339 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang @@ -62,6 +62,6 @@ func cs_main( if (visible) { let workgroup_count = (mesh_lod.meshlet_count + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; let index = std::atomic_add(params.cull_meshlets_cmd[0].x, workgroup_count, std::memory_order_relaxed); - params.visible_mesh_instances_indices[index] = mesh_instance_index; + // params.visible_mesh_instances_indices[index] = mesh_instance_index; } -} \ No newline at end of file +} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index f5aa2d46..92d2186b 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -4,6 +4,8 @@ import scene; import cull; import debug_drawer; +#include + struct ShaderParameters { ConstantBuffer camera; StructuredBuffer meshlet_instances; @@ -72,6 +74,7 @@ func cs_main( if (visible) { let index = std::atomic_add(params.cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + assert(index < meshlet_instance_count); params.visible_meshlet_instances_indices[index] = meshlet_instance_index; } -} \ No newline at end of file +} diff --git a/Lorr/Engine/Resources/shaders/passes/debug.slang b/Lorr/Engine/Resources/shaders/passes/debug.slang index 165fe4fd..886b5353 100644 --- a/Lorr/Engine/Resources/shaders/passes/debug.slang +++ b/Lorr/Engine/Resources/shaders/passes/debug.slang @@ -12,22 +12,22 @@ ParameterBlock params; func draw_aabb(u32 vertex_index, u32 instance_index, out f32x4 position, out f32x3 color, out DebugDrawCoord coord) { let aabb = params.debug_aabb_draws[instance_index]; - static let offsets = f32x3[24]( - // bottom - f32x3(-0.5, -0.5, -0.5), f32x3( 0.5, -0.5, -0.5), - f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, 0.5, -0.5), - f32x3(-0.5, 0.5, -0.5), f32x3( 0.5, 0.5, -0.5), - f32x3( 0.5, -0.5, -0.5), f32x3( 0.5, 0.5, -0.5), + static let offsets = f32x3[]( // top + f32x3(-0.5, 0.5, 0.5), f32x3( 0.5, 0.5, 0.5), + f32x3( 0.5, 0.5, 0.5), f32x3( 0.5, 0.5, -0.5), + f32x3( 0.5, 0.5, -0.5), f32x3(-0.5, 0.5, -0.5), + f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, 0.5, 0.5), + // bottom f32x3(-0.5, -0.5, 0.5), f32x3( 0.5, -0.5, 0.5), - f32x3(-0.5, -0.5, 0.5), f32x3(-0.5, 0.5, 0.5), - f32x3(-0.5, 0.5, 0.5), f32x3( 0.5, 0.5, 0.5), - f32x3( 0.5, -0.5, 0.5), f32x3( 0.5, 0.5, 0.5), - // connections + f32x3( 0.5, -0.5, 0.5), f32x3( 0.5, -0.5, -0.5), + f32x3( 0.5, -0.5, -0.5), f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, -0.5, 0.5), - f32x3( 0.5, -0.5, -0.5), f32x3( 0.5, -0.5, 0.5), - f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, 0.5, 0.5), - f32x3( 0.5, 0.5, -0.5), f32x3( 0.5, 0.5, 0.5) + // connections + f32x3(-0.5, 0.5, 0.5), f32x3(-0.5, -0.5, 0.5), + f32x3( 0.5, 0.5, 0.5), f32x3( 0.5, -0.5, 0.5), + f32x3( 0.5, 0.5, -0.5), f32x3( 0.5, -0.5, -0.5), + f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, -0.5, -0.5), ); position = f32x4(offsets[vertex_index] * aabb.size, 1.0); position = position + f32x4(aabb.position, 0.0); diff --git a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang index 50841984..c4d46e30 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang @@ -38,7 +38,7 @@ func cs_main( for (f32 i = 0.0; i < STEP_COUNT; i += 1.0) { ray_pos += sun_dir * distance_per_step; let ray_altitude = length(ray_pos) - params.atmosphere.planet_radius; - const let medium = MediumScattering(params.atmosphere, ray_altitude); + let medium = MediumScattering(params.atmosphere, ray_altitude); optical_depth += medium.extinction_sum * distance_per_step; } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index f388af73..63765fc4 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -99,7 +99,7 @@ func fs_main(VertexOutput input) -> FragmentOutput { let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; let mesh = params.meshes[mesh_instance.mesh_index]; - let material = params.materials[mesh.material_index]; + let material = params.materials[mesh_instance.material_index]; let transform = params.transforms[mesh_instance.transform_index]; let mesh_lod = mesh.lods[mesh_instance.lod_index]; let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 38f3fc4d..b67f429e 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -11,7 +11,7 @@ struct ShaderParameters { StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; - StructuredBuffer materials; + StructuredBuffer materials; StorageImage2D overdraw; }; ParameterBlock params; @@ -47,7 +47,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.tex_coord = tex_coord; output.meshlet_instance_index = vis.meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; - output.material_index = mesh.material_index; + output.material_index = mesh_instance.material_index; return output; } @@ -55,7 +55,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { [[shader("fragment")]] func fs_main(VertexOutput input) -> u32 { let material = params.materials[input.material_index]; - if (material.albedo_image_index != ~0u) { + if (material.flags & MaterialFlag::HasAlbedoImage) { UVGradient grad; grad.uv = input.tex_coord; grad.ddx = ddx(input.tex_coord); diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index d1216c1f..9a5e3abd 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -3,6 +3,8 @@ module scene; import std; import gpu; +#include + public const static f32 CAMERA_SCALE_UNIT = 0.01; public const static f32 INV_CAMERA_SCALE_UNIT = 1.0 / CAMERA_SCALE_UNIT; public const static f32 PLANET_RADIUS_OFFSET = 0.001; @@ -197,7 +199,10 @@ public struct Meshlet { // Takes a local triange index and returns an index to index buffer. public func index(in MeshLOD mesh_lod, u32 i) -> u32 { + assert(this.local_triangle_index_offset + i < mesh_lod.local_triangle_indices_count); let local_triangle_index = u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i]); + + assert(this.indirect_vertex_index_offset + local_triangle_index < mesh_lod.indirect_vertex_indices_count); return mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_index]; } @@ -274,7 +279,11 @@ public struct MeshLOD { public Bounds *meshlet_bounds = nullptr; public u8 *local_triangle_indices = nullptr; public u32 *indirect_vertex_indices = nullptr; + public u32 indices_count = 0; public u32 meshlet_count = 0; + public u32 meshlet_bounds_count = 0; + public u32 local_triangle_indices_count = 0; + public u32 indirect_vertex_indices_count = 0; public f32 error = 0.0; }; @@ -286,7 +295,7 @@ public struct Mesh { public f32x3 *vertex_positions = nullptr; public f32x3 *vertex_normals = nullptr; public f32x2 *texture_coords = nullptr; - public u32 material_index = 0; + public u32 _padding = 0; public u32 lod_count = 0; public MeshLOD lods[MESH_MAX_LODS] = {}; public Bounds bounds = {}; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 81de7e3f..04d21e17 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -180,12 +180,18 @@ struct MeshLOD { alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; alignas(8) u64 indirect_vertex_indices = 0; + + alignas(4) u32 indices_count = 0; alignas(4) u32 meshlet_count = 0; + alignas(4) u32 meshlet_bounds_count = 0; + alignas(4) u32 local_triangle_indices_count = 0; + alignas(4) u32 indirect_vertex_indices_count = 0; + alignas(4) f32 error = 0.0f; }; struct Mesh { - constexpr static auto MAX_LODS = 8_sz; + constexpr static auto MAX_LODS = 1_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index dbcbea45..c7b1cb8c 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -519,8 +519,6 @@ auto Scene::find_entity(this Scene &self, u32 transform_index) -> flecs::entity auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &info) -> vuk::Value { ZoneScoped; - auto &app = Application::get(); - // clang-format off auto camera_query = self.get_world() .query_builder() @@ -602,28 +600,15 @@ auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &i } }); - ls::option composed_scene = ls::nullopt; - if (self.models_dirty) { - memory::ScopedStack stack; - self.models_dirty = false; - - auto compose_info = self.compose(); - composed_scene.emplace(renderer.compose(compose_info)); - } + auto prepared_frame = self.prepare_frame(renderer); - info.materials_descriptor_set = app.asset_man.get_materials_descriptor_set(); - info.materials_buffer = app.asset_man.get_materials_buffer(); info.sun = sun_data; info.atmosphere = atmos_data; info.camera = active_camera_data; info.histogram_info = histogram_data; info.cull_flags = self.cull_flags; - info.dirty_transform_ids = self.dirty_transforms; - info.transforms = self.transforms.slots_unsafe(); - auto rendered_attachment = renderer.render(info, composed_scene); - self.dirty_transforms.clear(); - return rendered_attachment; + return renderer.render(info, prepared_frame); } auto Scene::tick(this Scene &self, f32 delta_time) -> bool { @@ -722,7 +707,7 @@ auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } -auto Scene::compose(this Scene &self) -> SceneComposeInfo { +auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> PreparedFrame { ZoneScoped; auto &app = Application::get(); @@ -731,43 +716,110 @@ auto Scene::compose(this Scene &self) -> SceneComposeInfo { auto gpu_mesh_instances = std::vector(); auto gpu_meshlet_instances = std::vector(); - for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { - auto *model = app.asset_man.get_model(rendering_mesh.n0); - const auto &mesh = model->meshes[rendering_mesh.n1]; - - for (auto primitive_index : mesh.primitive_indices) { - const auto &primitive = model->primitives[primitive_index]; - const auto &gpu_mesh = model->gpu_meshes[primitive_index]; - auto mesh_index = static_cast(gpu_meshes.size()); - gpu_meshes.emplace_back(gpu_mesh); - - // ── INSTANCING ────────────────────────────────────────────────── - for (const auto transform_id : transform_ids) { - auto mesh_instance_index = static_cast(gpu_mesh_instances.size()); - - auto lod_index = 0; - const auto &lod = gpu_mesh.lods[lod_index]; - - auto &mesh_instance = gpu_mesh_instances.emplace_back(); - mesh_instance.mesh_index = mesh_index; - mesh_instance.lod_index = lod_index; - mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; - mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; - - for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { - auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = meshlet_index; + if (self.models_dirty) { + for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { + auto *model = app.asset_man.get_model(rendering_mesh.n0); + const auto &mesh = model->meshes[rendering_mesh.n1]; + + for (auto primitive_index : mesh.primitive_indices) { + const auto &primitive = model->primitives[primitive_index]; + const auto &gpu_mesh = model->gpu_meshes[primitive_index]; + auto mesh_index = static_cast(gpu_meshes.size()); + gpu_meshes.emplace_back(gpu_mesh); + + // ── INSTANCING ────────────────────────────────────────────────── + for (const auto transform_id : transform_ids) { + auto mesh_instance_index = static_cast(gpu_mesh_instances.size()); + + auto lod_index = 0; + const auto &lod = gpu_mesh.lods[lod_index]; + + auto &mesh_instance = gpu_mesh_instances.emplace_back(); + mesh_instance.mesh_index = mesh_index; + mesh_instance.lod_index = lod_index; + mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; + mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; + + for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { + auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = meshlet_index; + } } } } } - return SceneComposeInfo{ - .gpu_meshes = std::move(gpu_meshes), - .gpu_mesh_instances = std::move(gpu_mesh_instances), - .gpu_meshlet_instances = std::move(gpu_meshlet_instances), + auto dirty_texture_ids = app.asset_man.get_dirty_texture_ids(); + auto dirty_texture_indices = std::vector(dirty_texture_ids.size()); + auto dirty_textures = std::vector>(dirty_texture_ids.size()); + for (const auto &[texture_pair, index, id] : std::views::zip(dirty_textures, dirty_texture_indices, dirty_texture_ids)) { + auto *texture = app.asset_man.get_texture(id); + texture_pair = ls::pair(texture->image_view.id(), texture->sampler.id()); + index = SlotMap_decode_id(id).index; + } + + auto uuid_to_index = [&](const UUID &uuid) -> ls::option { + if (!app.asset_man.is_texture_loaded(uuid)) { + return ls::nullopt; + } + + auto *texture_asset = app.asset_man.get_asset(uuid); + + return SlotMap_decode_id(texture_asset->texture_id).index; + }; + + auto dirty_material_ids = app.asset_man.get_dirty_material_ids(); + auto gpu_materials = std::vector(dirty_material_ids.size()); + auto dirty_material_indices = std::vector(dirty_material_ids.size()); + for (const auto &[gpu_material, index, id] : std::views::zip(gpu_materials, dirty_material_indices, dirty_material_ids)) { + const auto *material = app.asset_man.get_material(id); + auto albedo_image_index = uuid_to_index(material->albedo_texture); + auto normal_image_index = uuid_to_index(material->normal_texture); + auto emissive_image_index = uuid_to_index(material->emissive_texture); + auto metallic_roughness_image_index = uuid_to_index(material->metallic_roughness_texture); + auto occlusion_image_index = uuid_to_index(material->occlusion_texture); + + auto flags = GPU::MaterialFlag::None; + flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; + flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + + gpu_material.albedo_color = material->albedo_color; + gpu_material.emissive_color = material->emissive_color; + gpu_material.roughness_factor = material->roughness_factor; + gpu_material.metallic_factor = material->metallic_factor; + gpu_material.alpha_cutoff = material->alpha_cutoff; + gpu_material.flags = flags; + gpu_material.albedo_image_index = albedo_image_index.value_or(~0_u32); + gpu_material.normal_image_index = normal_image_index.value_or(~0_u32); + gpu_material.emissive_image_index = emissive_image_index.value_or(~0_u32); + gpu_material.metallic_roughness_image_index = metallic_roughness_image_index.value_or(~0_u32); + gpu_material.occlusion_image_index = occlusion_image_index.value_or(~0_u32); + + index = SlotMap_decode_id(id).index; + } + + auto prepare_info = FramePrepareInfo{ + .mesh_instance_count = static_cast(self.rendering_meshes_map.size()), + .dirty_texture_indices = dirty_texture_indices, + .dirty_textures = dirty_textures, + .dirty_transform_ids = self.dirty_transforms, + .gpu_transforms = self.transforms.slots_unsafe(), + .dirty_material_indices = dirty_material_indices, + .gpu_materials = gpu_materials, + .gpu_meshes = gpu_meshes, + .gpu_mesh_instances = gpu_mesh_instances, + .gpu_meshlet_instances = gpu_meshlet_instances, }; + auto prepared_frame = renderer.prepare_frame(prepare_info); + + self.models_dirty = false; + self.dirty_transforms.clear(); + + return prepared_frame; } auto Scene::add_transform(this Scene &self, flecs::entity entity) -> GPU::TransformID { diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 41f74fb8..76b69eb3 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -92,7 +92,7 @@ public: auto get_cull_flags(this Scene &) -> GPU::CullFlags &; private: - auto compose(this Scene &) -> SceneComposeInfo; + auto prepare_frame(this Scene &, SceneRenderer &renderer) -> PreparedFrame; auto add_transform(this Scene &, flecs::entity entity) -> GPU::TransformID; auto remove_transform(this Scene &, flecs::entity entity) -> void; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index b7c9bc18..91bc6110 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -28,11 +28,51 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi auto &asset_man = app.asset_man; auto &transfer_man = app.device.transfer_man(); auto shaders_root = asset_man.asset_root_path(AssetType::Shader); - auto *materials_set = asset_man.get_materials_descriptor_set(); + + constexpr auto MATERIAL_COUNT = 1024_sz; + BindlessDescriptorInfo bindless_set_info[] = { + { .binding = 0, .type = vuk::DescriptorType::eSampler, .descriptor_count = MATERIAL_COUNT }, + { .binding = 1, .type = vuk::DescriptorType::eSampledImage, .descriptor_count = MATERIAL_COUNT }, + }; + self.materials_descriptor_set = self.device->create_persistent_descriptor_set(bindless_set_info, 1).release(); + auto invalid_image_info = ImageInfo{ + .format = vuk::Format::eR8G8B8A8Srgb, + .usage = vuk::ImageUsageFlagBits::eSampled, + .type = vuk::ImageType::e2D, + .extent = { .width = 1, .height = 1, .depth = 1 }, + .name = "Invalid", + }; + std::tie(self.invalid_image, self.invalid_image_view) = Image::create_with_view(*self.device, invalid_image_info).value(); + auto invalid_image = self.device->image_view(self.invalid_image_view.id()); + + auto full_white = 0xFFFFFFFF_u32; + transfer_man.wait_on(transfer_man.upload_staging(self.invalid_image_view, &full_white, sizeof(decltype(full_white)))); + + auto invalid_sampler_info = SamplerInfo{ + .min_filter = vuk::Filter::eLinear, + .mag_filter = vuk::Filter::eLinear, + .mipmap_mode = vuk::SamplerMipmapMode::eLinear, + .addr_u = vuk::SamplerAddressMode::eRepeat, + .addr_v = vuk::SamplerAddressMode::eRepeat, + .addr_w = vuk::SamplerAddressMode::eRepeat, + .compare_op = vuk::CompareOp::eNever, + }; + auto invalid_sampler = Sampler::create(*self.device, invalid_sampler_info).value(); + auto invalid_sampler_handle = self.device->sampler(invalid_sampler.id()); + + for (auto i = 0_sz; i < MATERIAL_COUNT; i++) { + self.materials_descriptor_set.update_sampler(0, i, *invalid_sampler_handle); + self.materials_descriptor_set.update_sampled_image(1, i, *invalid_image, vuk::ImageLayout::eShaderReadOnlyOptimal); + } + self.device->commit_descriptor_set(self.materials_descriptor_set); + self.device->destroy(invalid_sampler.id()); // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ .definitions = { +#ifdef LS_DEBUG + { "ENABLE_ASSERTIONS", "1" }, +#endif // DEBUG { "CULLING_MESH_COUNT", "64" }, { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, { "CULLING_TRIANGLE_COUNT", std::to_string(Model::MAX_MESHLET_PRIMITIVES) }, @@ -123,7 +163,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_encode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, *materials_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, self.materials_descriptor_set).value(); auto vis_clear_pipeline_info = PipelineCompileInfo{ .module_name = "passes.visbuffer_clear", @@ -135,7 +175,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_decode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, *materials_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, self.materials_descriptor_set).value(); // ── PBR ───────────────────────────────────────────────────────────── auto pbr_basic_pipeline_info = PipelineCompileInfo{ @@ -234,178 +274,153 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi transfer_man.wait_on(std::move(multiscatter_lut_attachment)); self.exposure_buffer = Buffer::create(*self.device, sizeof(GPU::HistogramLuminance)).value(); + vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); } -auto SceneRenderer::compose(this SceneRenderer &self, SceneComposeInfo &compose_info) -> ComposedScene { +auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &info) -> PreparedFrame { ZoneScoped; auto &transfer_man = self.device->transfer_man(); + auto prepared_frame = PreparedFrame{}; - // IMPORTANT: Only wait when buffer is being resized!!! - // We can still copy into gpu buffer if it has enough space. + if (!info.dirty_transform_ids.empty()) { + auto rebuild_transforms = !self.materials_buffer || self.transforms_buffer.data_size() <= info.gpu_transforms.size_bytes(); + self.transforms_buffer = self.transforms_buffer.resize(*self.device, info.gpu_transforms.size_bytes()).value(); - if (ls::size_bytes(compose_info.gpu_meshlet_instances) > self.meshlet_instances_buffer.data_size()) { - if (self.meshlet_instances_buffer) { - self.device->wait(); - self.device->destroy(self.meshlet_instances_buffer.id()); - } + if (rebuild_transforms) { + // If we resize buffer, we need to refill it again, so individual uploads are not required. + prepared_frame.transforms_buffer = transfer_man.upload_staging(info.gpu_transforms, self.transforms_buffer); + } else { + // Buffer is not resized, upload individual transforms. + + auto dirty_transforms_count = info.dirty_transform_ids.size(); + auto dirty_transforms_size_bytes = dirty_transforms_count * sizeof(GPU::Transforms); + auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_transforms_size_bytes); + auto *dst_transform_ptr = reinterpret_cast(upload_buffer->mapped_ptr); + auto upload_offsets = std::vector(dirty_transforms_count); + + for (const auto &[dirty_transform_id, offset] : std::views::zip(info.dirty_transform_ids, upload_offsets)) { + auto index = SlotMap_decode_id(dirty_transform_id).index; + const auto &transform = info.gpu_transforms[index]; + std::memcpy(dst_transform_ptr, &transform, sizeof(GPU::Transforms)); + offset = index * sizeof(GPU::Transforms); + dst_transform_ptr++; + } - self.meshlet_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshlet_instances)).value(); - } + auto update_transforms_pass = vuk::make_pass( + "update scene transforms", + [upload_offsets = std::move(upload_offsets)]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::Access::eTransferRead) src_buffer, + VUK_BA(vuk::Access::eTransferWrite) dst_buffer + ) { + for (usize i = 0; i < upload_offsets.size(); i++) { + auto offset = upload_offsets[i]; + auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Transforms), sizeof(GPU::Transforms)); + auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Transforms)); + cmd_list.copy_buffer(src_subrange, dst_subrange); + } + + return dst_buffer; + } + ); - if (ls::size_bytes(compose_info.gpu_mesh_instances) > self.mesh_instances_buffer.data_size()) { - if (self.mesh_instances_buffer) { - self.device->wait(); - self.device->destroy(self.mesh_instances_buffer.id()); + prepared_frame.transforms_buffer = self.transforms_buffer.acquire(*self.device, "transforms", vuk::Access::eMemoryRead); + prepared_frame.transforms_buffer = update_transforms_pass(std::move(upload_buffer), std::move(prepared_frame.transforms_buffer)); } - - self.mesh_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_mesh_instances)).value(); + } else if (self.transforms_buffer) { + prepared_frame.transforms_buffer = self.transforms_buffer.acquire(*self.device, "transforms", vuk::Access::eMemoryRead); } - if (ls::size_bytes(compose_info.gpu_meshes) > self.meshes_buffer.data_size()) { - if (self.meshes_buffer) { - self.device->wait(); - self.device->destroy(self.meshes_buffer.id()); + if (!info.dirty_texture_indices.empty()) { + for (const auto &[texture_pair, index] : std::views::zip(info.dirty_textures, info.dirty_texture_indices)) { + auto image_view = self.device->image_view(texture_pair.n0); + auto sampler = self.device->sampler(texture_pair.n1); + self.materials_descriptor_set.update_sampler(0, index, sampler.value()); + self.materials_descriptor_set.update_sampled_image(1, index, image_view.value(), vuk::ImageLayout::eShaderReadOnlyOptimal); } - self.meshes_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshes)).value(); + self.device->commit_descriptor_set(self.materials_descriptor_set); } - auto meshlet_instances_buffer = vuk::Value{}; - if (!compose_info.gpu_meshlet_instances.empty()) { - meshlet_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshlet_instances), self.meshlet_instances_buffer); - } + if (!info.dirty_material_indices.empty()) { + auto rebuild_materials = !self.materials_buffer || self.materials_buffer.data_size() <= info.gpu_materials.size_bytes(); + self.materials_buffer = self.materials_buffer.resize(*self.device, info.gpu_materials.size_bytes()).value(); - auto mesh_instances_buffer = vuk::Value{}; - if (!compose_info.gpu_mesh_instances.empty()) { - mesh_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_mesh_instances), self.mesh_instances_buffer); - } + if (rebuild_materials) { + prepared_frame.materials_buffer = transfer_man.upload_staging(info.gpu_materials, self.materials_buffer); + } else { + // TODO: Literally repeating code, find a solution to this + auto dirty_materials_count = info.dirty_material_indices.size(); + auto dirty_materials_size_bytes = dirty_materials_count * sizeof(GPU::Material); + auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_materials_size_bytes); + auto *dst_materials_ptr = upload_buffer->mapped_ptr; + auto upload_offsets = std::vector(dirty_materials_count); + + for (const auto &[dirty_material, index, offset] : std::views::zip(info.gpu_materials, info.dirty_material_indices, upload_offsets)) { + std::memcpy(dst_materials_ptr, &dirty_material, sizeof(GPU::Material)); + offset = index * sizeof(GPU::Material); + dst_materials_ptr++; + } - auto meshes_buffer = vuk::Value{}; - if (!compose_info.gpu_meshes.empty()) { - meshes_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshes), self.meshes_buffer); - } + auto update_materials_pass = vuk::make_pass( + "update scene materials", + [upload_offsets = std::move(upload_offsets)]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::Access::eTransferRead) src_buffer, + VUK_BA(vuk::Access::eTransferWrite) dst_buffer + ) { + for (usize i = 0; i < upload_offsets.size(); i++) { + auto offset = upload_offsets[i]; + auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Material), sizeof(GPU::Material)); + auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Material)); + cmd_list.copy_buffer(src_subrange, dst_subrange); + } + + return dst_buffer; + } + ); - if (self.exposure_buffer) { - vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); + prepared_frame.materials_buffer = self.materials_buffer.acquire(*self.device, "materials", vuk::eMemoryRead); + prepared_frame.materials_buffer = update_materials_pass(std::move(upload_buffer), std::move(prepared_frame.materials_buffer)); + } + } else if (self.materials_buffer) { + prepared_frame.materials_buffer = self.materials_buffer.acquire(*self.device, "materials", vuk::eMemoryRead); } - self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); - self.mesh_instance_count = compose_info.gpu_mesh_instances.size(); - - return ComposedScene{ - .meshlet_instances_buffer = meshlet_instances_buffer, - .mesh_instances_buffer = mesh_instances_buffer, - .meshes_buffer = meshes_buffer, - }; -} - -auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { - ZoneScoped; - - self.device->wait(); - - self.mesh_instance_count = 0; - - if (self.transforms_buffer) { - self.device->destroy(self.transforms_buffer.id()); - self.transforms_buffer = {}; + if (!info.gpu_meshes.empty()) { + self.meshes_buffer = self.meshes_buffer.resize(*self.device, info.gpu_meshes.size_bytes()).value(); + prepared_frame.meshes_buffer = transfer_man.upload_staging(info.gpu_meshes, self.meshes_buffer); + } else if (self.meshes_buffer) { + prepared_frame.meshes_buffer = self.meshes_buffer.acquire(*self.device, "meshes", vuk::eMemoryRead); } - if (self.meshlet_instances_buffer) { - self.device->destroy(self.meshlet_instances_buffer.id()); - self.meshlet_instances_buffer = {}; - } + if (!info.gpu_mesh_instances.empty()) { + self.mesh_instances_buffer = self.mesh_instances_buffer.resize(*self.device, info.gpu_mesh_instances.size_bytes()).value(); + prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); - if (self.mesh_instances_buffer) { - self.device->destroy(self.mesh_instances_buffer.id()); - self.mesh_instances_buffer = {}; + self.mesh_instance_count = info.gpu_mesh_instances.size(); + } else if (self.mesh_instances_buffer) { + prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(*self.device, "mesh instances", vuk::eMemoryRead); } - if (self.meshes_buffer) { - self.device->destroy(self.meshes_buffer.id()); - self.meshes_buffer = {}; - } + if (!info.gpu_meshlet_instances.empty()) { + self.meshlet_instances_buffer = self.meshlet_instances_buffer.resize(*self.device, info.gpu_meshlet_instances.size_bytes()).value(); + prepared_frame.meshlet_instances_buffer = transfer_man.upload_staging(info.gpu_meshlet_instances, self.meshlet_instances_buffer); - if (self.hiz_view) { - self.device->destroy(self.hiz_view.id()); - self.hiz_view = {}; + self.meshlet_instance_count = info.gpu_meshlet_instances.size(); + } else if (self.meshlet_instances_buffer) { + prepared_frame.meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "meshlet instances", vuk::eMemoryRead); } - if (self.hiz) { - self.device->destroy(self.hiz.id()); - self.hiz = {}; - } + return prepared_frame; } -auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls::option &composed_scene) - -> vuk::Value { +auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, PreparedFrame &frame) -> vuk::Value { ZoneScoped; auto &transfer_man = self.device->transfer_man(); - // ── ENTITY TRANSFORMS ─────────────────────────────────────────────── - // - // WARN: compose_info.transforms contains _ALL_ transforms!!! - // - bool rebuild_transforms = false; - if (info.transforms.size_bytes() > self.transforms_buffer.data_size()) { - if (self.transforms_buffer.id() != BufferID::Invalid) { - // Device wait here is important, do not remove it. Why? - // We are using ONE transform buffer for all frames, if - // this buffer gets destroyed in current frame, previous - // rendering frame buffer will get corrupt and crash GPU. - self.device->wait(); - self.device->destroy(self.transforms_buffer.id()); - } - - self.transforms_buffer = Buffer::create(*self.device, info.transforms.size_bytes(), vuk::MemoryUsage::eGPUonly).value(); - - rebuild_transforms = true; - } - - auto transforms_buffer = self.transforms_buffer.acquire(*self.device, "Transforms Buffer", vuk::Access::eMemoryRead); - - if (rebuild_transforms) { - transforms_buffer = transfer_man.upload_staging(info.transforms, std::move(transforms_buffer)); - } else if (!info.dirty_transform_ids.empty()) { - auto transform_count = info.dirty_transform_ids.size(); - auto new_transforms_size_bytes = transform_count * sizeof(GPU::Transforms); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, new_transforms_size_bytes); - auto *dst_transform_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - auto upload_offsets = std::vector(transform_count); - - for (const auto &[dirty_transform_id, offset] : std::views::zip(info.dirty_transform_ids, upload_offsets)) { - auto index = SlotMap_decode_id(dirty_transform_id).index; - const auto &transform = info.transforms[index]; - std::memcpy(dst_transform_ptr, &transform, sizeof(GPU::Transforms)); - offset = index * sizeof(GPU::Transforms); - dst_transform_ptr++; - } - - auto update_transforms_pass = vuk::make_pass( - "update scene transforms", - [upload_offsets = std::move( - upload_offsets - )]( // - vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::Access::eTransferRead) src_buffer, - VUK_BA(vuk::Access::eTransferWrite) dst_buffer - ) { - for (usize i = 0; i < upload_offsets.size(); i++) { - auto offset = upload_offsets[i]; - auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Transforms), sizeof(GPU::Transforms)); - auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Transforms)); - cmd_list.copy_buffer(src_subrange, dst_subrange); - } - - return dst_buffer; - } - ); - - transforms_buffer = update_transforms_pass(std::move(upload_buffer), std::move(transforms_buffer)); - } - // ────────────────────────────────────────────────────────────────────── auto final_attachment = vuk::declare_ia( "final", @@ -548,21 +563,11 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: } if (self.mesh_instance_count) { - auto meshlet_instances_buffer = vuk::Value{}; - auto mesh_instances_buffer = vuk::Value{}; - auto meshes_buffer = vuk::Value{}; - if (composed_scene.has_value()) { - meshlet_instances_buffer = std::move(composed_scene->meshlet_instances_buffer); - mesh_instances_buffer = std::move(composed_scene->mesh_instances_buffer); - meshes_buffer = std::move(composed_scene->meshes_buffer); - } else { - meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "meshlet instances", vuk::Access::eNone); - mesh_instances_buffer = self.mesh_instances_buffer.acquire(*self.device, "mesh instances", vuk::Access::eNone); - meshes_buffer = self.meshes_buffer.acquire(*self.device, "meshes", vuk::Access::eNone); - } - - auto materials_buffer = std::move(info.materials_buffer); - auto *materials_set = info.materials_descriptor_set; + auto transforms_buffer = std::move(frame.transforms_buffer); + auto meshes_buffer = std::move(frame.meshes_buffer); + auto mesh_instances_buffer = std::move(frame.mesh_instances_buffer); + auto meshlet_instances_buffer = std::move(frame.meshlet_instances_buffer); + auto materials_buffer = std::move(frame.materials_buffer); // ── CULL MESHES ───────────────────────────────────────────────────── auto vis_cull_meshes_pass = vuk::make_pass( @@ -802,7 +807,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: // ── VISBUFFER ENCODE ──────────────────────────────────────────────── auto vis_encode_pass = vuk::make_pass( "vis encode", - [descriptor_set = materials_set]( + [descriptor_set = &self.materials_descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, VUK_BA(vuk::eIndexRead) index_buffer, @@ -942,7 +947,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: // ── VISBUFFER DECODE ──────────────────────────────────────────────── auto vis_decode_pass = vuk::make_pass( "vis decode", - [descriptor_set = materials_set]( // + [descriptor_set = &self.materials_descriptor_set]( // vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eFragmentRead) camera, VUK_BA(vuk::eFragmentRead) meshlet_instances, @@ -1456,7 +1461,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: cmd_list // .bind_graphics_pipeline("passes.debug") .set_rasterization({ .polygonMode = vuk::PolygonMode::eFill, .lineWidth = 1.8f }) - .set_primitive_topology(vuk::PrimitiveTopology::eLineStrip) + .set_primitive_topology(vuk::PrimitiveTopology::eLineList) .set_color_blend(dst, vuk::BlendPreset::eOff) .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) .set_viewport(0, vuk::Rect2D::framebuffer()) @@ -1489,4 +1494,47 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: return result_attachment; } +auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { + ZoneScoped; + + self.device->wait(); + + self.mesh_instance_count = 0; + + if (self.transforms_buffer) { + self.device->destroy(self.transforms_buffer.id()); + self.transforms_buffer = {}; + } + + if (self.meshlet_instances_buffer) { + self.device->destroy(self.meshlet_instances_buffer.id()); + self.meshlet_instances_buffer = {}; + } + + if (self.mesh_instances_buffer) { + self.device->destroy(self.mesh_instances_buffer.id()); + self.mesh_instances_buffer = {}; + } + + if (self.meshes_buffer) { + self.device->destroy(self.meshes_buffer.id()); + self.meshes_buffer = {}; + } + + if (self.materials_buffer) { + self.device->destroy(self.materials_buffer.id()); + self.materials_buffer = {}; + } + + if (self.hiz_view) { + self.device->destroy(self.hiz_view.id()); + self.hiz_view = {}; + } + + if (self.hiz) { + self.device->destroy(self.hiz.id()); + self.hiz = {}; + } +} + } // namespace lr diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 7c5bc174..721b68bd 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -5,25 +5,37 @@ #include "Engine/Scene/GPUScene.hh" namespace lr { -struct SceneComposeInfo { - std::vector gpu_meshes = {}; - std::vector gpu_mesh_instances = {}; - std::vector gpu_meshlet_instances = {}; +struct FramePrepareInfo { + u32 mesh_instance_count = 0; + u32 meshlet_instance_count = 0; + + ls::span dirty_texture_indices = {}; + ls::span> dirty_textures = {}; + + ls::span dirty_transform_ids = {}; + ls::span gpu_transforms = {}; + + ls::span dirty_material_indices = {}; + ls::span gpu_materials = {}; + + ls::span gpu_meshes = {}; + ls::span gpu_mesh_instances = {}; + ls::span gpu_meshlet_instances = {}; }; -struct ComposedScene { - vuk::Value meshlet_instances_buffer = {}; - vuk::Value mesh_instances_buffer = {}; +struct PreparedFrame { + vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; + vuk::Value mesh_instances_buffer = {}; + vuk::Value meshlet_instances_buffer = {}; + vuk::Value materials_buffer = {}; }; struct SceneRenderInfo { vuk::Format format = vuk::Format::eR8G8B8A8Srgb; vuk::Extent3D extent = {}; f32 delta_time = 0.0f; - - vuk::PersistentDescriptorSet *materials_descriptor_set = nullptr; - vuk::Value materials_buffer = {}; + GPU::CullFlags cull_flags = {}; ls::option sun = ls::nullopt; ls::option atmosphere = ls::nullopt; @@ -31,10 +43,6 @@ struct SceneRenderInfo { ls::option picking_texel = ls::nullopt; ls::option histogram_info = ls::nullopt; - GPU::CullFlags cull_flags = {}; - ls::span dirty_transform_ids = {}; - ls::span transforms = {}; - ls::option picked_transform_index = ls::nullopt; }; @@ -44,14 +52,20 @@ struct SceneRenderer { // Scene resources Buffer exposure_buffer = {}; Buffer transforms_buffer = {}; - u32 meshlet_instance_count = 0; - Buffer meshlet_instances_buffer = {}; + u32 mesh_instance_count = 0; + u32 meshlet_instance_count = 0; Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; + Buffer meshlet_instances_buffer = {}; + + vuk::PersistentDescriptorSet materials_descriptor_set = {}; + Buffer materials_buffer = {}; // Then what are they? // TODO: Per scene sky settings + Image invalid_image = {}; + ImageView invalid_image_view = {}; Image sky_transmittance_lut = {}; ImageView sky_transmittance_lut_view = {}; Image sky_multiscatter_lut = {}; @@ -63,7 +77,6 @@ struct SceneRenderer { ImageView hiz_view = {}; bool debug_lines = false; - f32 debug_heatmap_scale = 5.0; auto init(this SceneRenderer &, Device *device) -> bool; auto destroy(this SceneRenderer &) -> void; @@ -71,9 +84,9 @@ struct SceneRenderer { auto create_persistent_resources(this SceneRenderer &) -> void; // Scene - auto compose(this SceneRenderer &, SceneComposeInfo &compose_info) -> ComposedScene; + auto prepare_frame(this SceneRenderer &, FramePrepareInfo &info) -> PreparedFrame; + auto render(this SceneRenderer &, SceneRenderInfo &render_info, PreparedFrame &frame) -> vuk::Value; auto cleanup(this SceneRenderer &) -> void; - auto render(this SceneRenderer &, SceneRenderInfo &render_info, ls::option &composed_scene) -> vuk::Value; }; } // namespace lr diff --git a/shell.nix b/shell.nix index adebdf46..8799823b 100644 --- a/shell.nix +++ b/shell.nix @@ -1,37 +1,39 @@ let pkgs = import {}; - pkgs-unstable = import {}; -in -pkgs.mkShell.override { stdenv = pkgs-unstable.llvmPackages_20.libcxxStdenv; } { +in +pkgs.mkShell.override { stdenv = pkgs.llvmPackages_20.libcxxStdenv; } { nativeBuildInputs = [ pkgs.cmake pkgs.ninja pkgs.gnumake pkgs.xmake + pkgs. - pkgs-unstable.llvmPackages_20.bintools-unwrapped - pkgs-unstable.llvmPackages_20.libcxx.dev - pkgs-unstable.llvmPackages_20.compiler-rt - (pkgs-unstable.llvmPackages_20.clang-tools.override { enableLibcxx = true; }) + pkgs.llvmPackages_20.bintools-unwrapped + pkgs.llvmPackages_20.libcxx.dev + pkgs.llvmPackages_20.compiler-rt + (pkgs.llvmPackages_20.clang-tools.override { enableLibcxx = true; }) pkgs.mold pkgs.pkg-config - pkgs-unstable.python313 - pkgs-unstable.python313Packages.pip - pkgs-unstable.python313Packages.setuptools - pkgs-unstable.python313Packages.wheel + pkgs.python313 + pkgs.python313Packages.pip + pkgs.python313Packages.setuptools + pkgs.python313Packages.wheel pkgs.zlib.dev # for gltfpack - pkgs-unstable.meshoptimizer + pkgs.meshoptimizer # for SDL3 - pkgs-unstable.sdl3 + pkgs.sdl3 ]; shellHook = '' - export LD_LIBRARY_PATH=${pkgs-unstable.llvmPackages_20.libcxx}/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${pkgs.llvmPackages_20.libcxx}/lib:$LD_LIBRARY_PATH + # slang needs libstdc++ + export LD_LIBRARY_PATH=${pkgs.gcc14.cc.lib}/lib:$LD_LIBRARY_PATH ''; hardeningDisable = [ "all" ]; From fd725d6e7e06cfb5fa4587b663208e99cc79fc47 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 3 Aug 2025 18:44:50 +0300 Subject: [PATCH 12/16] material cpu buffer cast --- Lorr/Engine/Scene/SceneRenderer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 91bc6110..5a1e7949 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -354,7 +354,7 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in auto dirty_materials_count = info.dirty_material_indices.size(); auto dirty_materials_size_bytes = dirty_materials_count * sizeof(GPU::Material); auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_materials_size_bytes); - auto *dst_materials_ptr = upload_buffer->mapped_ptr; + auto *dst_materials_ptr = reinterpret_cast(upload_buffer->mapped_ptr); auto upload_offsets = std::vector(dirty_materials_count); for (const auto &[dirty_material, index, offset] : std::views::zip(info.gpu_materials, info.dirty_material_indices, upload_offsets)) { From dc1d1b83ccf71693d53a54038ffa8e047aacc307 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 3 Aug 2025 20:05:15 +0300 Subject: [PATCH 13/16] wait before updating materials --- Lorr/Engine/Graphics/Vulkan/Pipeline.cc | 4 ++-- Lorr/Engine/Scene/SceneRenderer.cc | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc index f976c2a6..e0557639 100644 --- a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc +++ b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc @@ -13,8 +13,8 @@ auto Pipeline::create( vuk::PipelineBaseCreateInfo create_info = {}; - for (const auto &v : persistent_sets) { - create_info.explicit_set_layouts.push_back(v.set_layout_create_info); + for (const auto &set : persistent_sets) { + create_info.explicit_set_layouts.push_back(set.set_layout_create_info); } auto slang_module = session.load_module({ .module_name = compile_info.module_name, .source = compile_info.shader_source }).value(); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 5a1e7949..563fae5e 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -341,6 +341,7 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } self.device->commit_descriptor_set(self.materials_descriptor_set); + self.device->wait(); // I have no idea how to enable UPDATE_AFTER_BIND in vuk } if (!info.dirty_material_indices.empty()) { From 6963cdb7ca232b9c79f37f220243e07110406886 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Wed, 6 Aug 2025 14:28:19 +0300 Subject: [PATCH 14/16] fix use after free on dirty materials --- Lorr/Engine/Asset/Asset.cc | 2 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 118 +++++++++++++----- Lorr/Engine/Graphics/Vulkan/Pipeline.cc | 4 + Lorr/Engine/Graphics/VulkanDevice.hh | 16 ++- .../shaders/passes/visbuffer_encode.slang | 2 + Lorr/Engine/Resources/shaders/scene.slang | 6 +- Lorr/Engine/Scene/SceneRenderer.cc | 106 ++++++++++------ Lorr/Engine/Scene/SceneRenderer.hh | 2 - 8 files changed, 170 insertions(+), 86 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 94cb3598..da8568e6 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -1526,7 +1526,7 @@ auto AssetManager::delete_asset(const UUID &uuid) -> void { } } - LOG_TRACE("Deleted asset {}.", uuid.str()); + // LOG_TRACE("Deleted asset {}.", uuid.str()); } auto AssetManager::get_asset(const UUID &uuid) -> Asset * { diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 7947871e..b655ce70 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -2,6 +2,12 @@ #include +// i hate this +PFN_vkCreateDescriptorPool vk_CreateDescriptorPool; +PFN_vkCreateDescriptorSetLayout vk_CreateDescriptorSetLayout; +PFN_vkAllocateDescriptorSets vk_AllocateDescriptorSets; +PFN_vkUpdateDescriptorSets vk_UpdateDescriptorSets; + namespace lr { constexpr fmtlog::LogLevel to_log_category(VkDebugUtilsMessageSeverityFlagBitsEXT severity) { switch (severity) { @@ -88,6 +94,7 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected std::expected std::expected on shader yet. + // WARN: this extension is only supported by // .add_pNext(&maintenance_8_features) - - // NOTE: LLVMPipe does not support this extension yet - //.add_pNext(&image_atomic_int64_features) .add_pNext(&vk10_features); auto device_result = device_builder.build(); if (!device_result) { @@ -191,6 +197,11 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected &&tar .on_begin_pass = on_begin_pass, .on_end_pass = on_end_pass, .user_data = &self, - } }); + } } + ); } auto Device::wait(this Device &self, LR_CALLSTACK) -> void { @@ -347,35 +359,81 @@ auto Device::wait(this Device &self, LR_CALLSTACK) -> void { self.runtime->wait_idle(); } -auto Device::create_persistent_descriptor_set(this Device &self, ls::span bindings, u32 index) - -> vuk::Unique { +auto Device::create_persistent_descriptor_set( + this Device &self, + u32 set_index, + ls::span bindings, + ls::span binding_flags +) -> vuk::PersistentDescriptorSet { ZoneScoped; - u32 descriptor_count = 0; - auto raw_bindings = std::vector(bindings.size()); - auto binding_flags = std::vector(bindings.size()); - for (const auto &[binding, raw_binding, raw_binding_flags] : std::views::zip(bindings, raw_bindings, binding_flags)) { - raw_binding.binding = binding.binding; - raw_binding.descriptorType = vuk::DescriptorBinding::vk_descriptor_type(binding.type); - raw_binding.descriptorCount = binding.descriptor_count; - raw_binding.stageFlags = VK_SHADER_STAGE_ALL; - raw_binding_flags = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; - descriptor_count += binding.descriptor_count; + LS_EXPECT(bindings.size() == binding_flags.size()); + + auto descriptor_sizes = std::vector(); + for (const auto &binding : bindings) { + LS_EXPECT(binding.descriptorType < VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT); + descriptor_sizes.emplace_back(binding.descriptorType, binding.descriptorCount); } - vuk::DescriptorSetLayoutCreateInfo layout_ci = { - .index = index, - .bindings = std::move(raw_bindings), - .flags = std::move(binding_flags), + auto pool_flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; + auto pool_info = VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = pool_flags, + .maxSets = 1, + .poolSizeCount = static_cast(descriptor_sizes.size()), + .pPoolSizes = descriptor_sizes.data(), + }; + auto pool = VkDescriptorPool{}; + vk_CreateDescriptorPool(self.handle, &pool_info, nullptr, &pool); + + auto set_layout_binding_flags_info = VkDescriptorSetLayoutBindingFlagsCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = nullptr, + .bindingCount = static_cast(binding_flags.size()), + .pBindingFlags = binding_flags.data(), }; - return self.runtime->create_persistent_descriptorset(self.allocator.value(), layout_ci, descriptor_count); + auto set_layout_info = VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &set_layout_binding_flags_info, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + auto set_layout = VkDescriptorSetLayout{}; + vk_CreateDescriptorSetLayout(self.handle, &set_layout_info, nullptr, &set_layout); + + auto set_alloc_info = VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = pool, + .descriptorSetCount = 1, + .pSetLayouts = &set_layout, + }; + auto descriptor_set = VkDescriptorSet{}; + vk_AllocateDescriptorSets(self.handle, &set_alloc_info, &descriptor_set); + + auto persistent_set_create_info = vuk::DescriptorSetLayoutCreateInfo{ + .dslci = set_layout_info, + .index = set_index, + .bindings = std::vector(bindings.begin(), bindings.end()), + .flags = std::vector(binding_flags.begin(), binding_flags.end()), + }; + return vuk::PersistentDescriptorSet{ + .backing_pool = pool, + .set_layout_create_info = persistent_set_create_info, + .set_layout = set_layout, + .backing_set = descriptor_set, + .wdss = {}, + .descriptor_bindings = {}, + }; } -auto Device::commit_descriptor_set(this Device &self, vuk::PersistentDescriptorSet &set) -> void { +auto Device::commit_descriptor_set(this Device &self, ls::span writes) -> void { ZoneScoped; - set.commit(self.runtime.value()); + vk_UpdateDescriptorSets(self.handle, writes.size(), writes.data(), 0, nullptr); } auto Device::create_swap_chain(this Device &self, VkSurfaceKHR surface, ls::option old_swap_chain) diff --git a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc index e0557639..79b228c4 100644 --- a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc +++ b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc @@ -15,6 +15,10 @@ auto Pipeline::create( for (const auto &set : persistent_sets) { create_info.explicit_set_layouts.push_back(set.set_layout_create_info); + for (const auto &[binding, binding_flags] : std::views::zip(set.set_layout_create_info.bindings, set.set_layout_create_info.flags)) { + create_info + .set_binding_flags(set.set_layout_create_info.index, binding.binding, static_cast(binding_flags)); + } } auto slang_module = session.load_module({ .module_name = compile_info.module_name, .source = compile_info.shader_source }).value(); diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index 4a70d03b..daac9eb1 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -15,12 +15,6 @@ #include namespace lr { -struct BindlessDescriptorInfo { - u32 binding = 0; - vuk::DescriptorType type = {}; - u32 descriptor_count = 0; -}; - struct TransferManager { private: Device *device = nullptr; @@ -141,9 +135,13 @@ public: auto end_frame(this Device &, vuk::Value &&target_attachment) -> void; auto wait(this Device &, LR_THISCALL) -> void; - auto create_persistent_descriptor_set(this Device &, ls::span bindings, u32 index) - -> vuk::Unique; - auto commit_descriptor_set(this Device &, vuk::PersistentDescriptorSet &set) -> void; + auto create_persistent_descriptor_set( + this Device &, + u32 set_index, + ls::span bindings, + ls::span binding_flags + ) -> vuk::PersistentDescriptorSet; + auto commit_descriptor_set(this Device &, ls::span writes) -> void; auto create_swap_chain(this Device &, VkSurfaceKHR surface, ls::option old_swap_chain = ls::nullopt) -> std::expected; diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index b67f429e..d4970a31 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -54,6 +54,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { [[shader("fragment")]] func fs_main(VertexOutput input) -> u32 { +#if 1 let material = params.materials[input.material_index]; if (material.flags & MaterialFlag::HasAlbedoImage) { UVGradient grad; @@ -67,6 +68,7 @@ func fs_main(VertexOutput input) -> u32 { discard; } } +#endif std::atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, std::memory_order_acq_rel, std::MemoryLocation::Image, MemoryScope::QueueFamily); diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 9a5e3abd..fdc7c5b2 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -138,7 +138,7 @@ public struct Material { public u32 albedo_image_index = ~0u; public u32 normal_image_index = ~0u; public u32 emissive_image_index = ~0u; - public u32 metallic_rougness_image_index = ~0u; + public u32 metallic_roughness_image_index = ~0u; public u32 occlusion_image_index = ~0u; public func sample_albedo_color(in UVGradient grad) -> f32x4 { @@ -173,8 +173,8 @@ public struct Material { public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { - let color = material_images[this.metallic_rougness_image_index] - .sample_grad(material_samplers[this.metallic_rougness_image_index], grad.uv, grad.ddx, grad.ddy).bg; + let color = material_images[this.metallic_roughness_image_index] + .sample_grad(material_samplers[this.metallic_roughness_image_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 563fae5e..b77f78de 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -30,42 +30,24 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi auto shaders_root = asset_man.asset_root_path(AssetType::Shader); constexpr auto MATERIAL_COUNT = 1024_sz; - BindlessDescriptorInfo bindless_set_info[] = { - { .binding = 0, .type = vuk::DescriptorType::eSampler, .descriptor_count = MATERIAL_COUNT }, - { .binding = 1, .type = vuk::DescriptorType::eSampledImage, .descriptor_count = MATERIAL_COUNT }, + VkDescriptorSetLayoutBinding bindless_set_info[] = { + { .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = MATERIAL_COUNT, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + { .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = MATERIAL_COUNT, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, }; - self.materials_descriptor_set = self.device->create_persistent_descriptor_set(bindless_set_info, 1).release(); - auto invalid_image_info = ImageInfo{ - .format = vuk::Format::eR8G8B8A8Srgb, - .usage = vuk::ImageUsageFlagBits::eSampled, - .type = vuk::ImageType::e2D, - .extent = { .width = 1, .height = 1, .depth = 1 }, - .name = "Invalid", - }; - std::tie(self.invalid_image, self.invalid_image_view) = Image::create_with_view(*self.device, invalid_image_info).value(); - auto invalid_image = self.device->image_view(self.invalid_image_view.id()); - - auto full_white = 0xFFFFFFFF_u32; - transfer_man.wait_on(transfer_man.upload_staging(self.invalid_image_view, &full_white, sizeof(decltype(full_white)))); - - auto invalid_sampler_info = SamplerInfo{ - .min_filter = vuk::Filter::eLinear, - .mag_filter = vuk::Filter::eLinear, - .mipmap_mode = vuk::SamplerMipmapMode::eLinear, - .addr_u = vuk::SamplerAddressMode::eRepeat, - .addr_v = vuk::SamplerAddressMode::eRepeat, - .addr_w = vuk::SamplerAddressMode::eRepeat, - .compare_op = vuk::CompareOp::eNever, - }; - auto invalid_sampler = Sampler::create(*self.device, invalid_sampler_info).value(); - auto invalid_sampler_handle = self.device->sampler(invalid_sampler.id()); - for (auto i = 0_sz; i < MATERIAL_COUNT; i++) { - self.materials_descriptor_set.update_sampler(0, i, *invalid_sampler_handle); - self.materials_descriptor_set.update_sampled_image(1, i, *invalid_image, vuk::ImageLayout::eShaderReadOnlyOptimal); - } - self.device->commit_descriptor_set(self.materials_descriptor_set); - self.device->destroy(invalid_sampler.id()); + VkDescriptorBindingFlags bindless_set_binding_flags[] = { + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, + }; + self.materials_descriptor_set = self.device->create_persistent_descriptor_set(1, bindless_set_info, bindless_set_binding_flags); // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ @@ -333,15 +315,57 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } if (!info.dirty_texture_indices.empty()) { - for (const auto &[texture_pair, index] : std::views::zip(info.dirty_textures, info.dirty_texture_indices)) { - auto image_view = self.device->image_view(texture_pair.n0); - auto sampler = self.device->sampler(texture_pair.n1); - self.materials_descriptor_set.update_sampler(0, index, sampler.value()); - self.materials_descriptor_set.update_sampled_image(1, index, image_view.value(), vuk::ImageLayout::eShaderReadOnlyOptimal); + auto sampler_descriptor_infos = std::vector(); + auto image_descriptor_infos = std::vector(); + for (const auto &[image_view_id, sampler_id] : info.dirty_textures) { + auto image_view = self.device->image_view(image_view_id); + auto sampler = self.device->sampler(sampler_id); + + sampler_descriptor_infos.push_back( + { .sampler = sampler.value().payload, // + .imageView = nullptr, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED } + ); + image_descriptor_infos.push_back( + { .sampler = nullptr, // + .imageView = image_view.value().payload, + .imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL } + ); + } + + auto descriptor_writes = std::vector(); + for (const auto &[i, descriptor_index] : std::views::zip(std::views::iota(0_u32), info.dirty_texture_indices)) { + auto sampler_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = self.materials_descriptor_set.backing_set, + .dstBinding = 0, + .dstArrayElement = descriptor_index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &sampler_descriptor_infos[i], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + descriptor_writes.push_back(sampler_write); + + auto image_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = self.materials_descriptor_set.backing_set, + .dstBinding = 1, + .dstArrayElement = descriptor_index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &image_descriptor_infos[i], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + descriptor_writes.push_back(image_write); } - self.device->commit_descriptor_set(self.materials_descriptor_set); - self.device->wait(); // I have no idea how to enable UPDATE_AFTER_BIND in vuk + self.device->commit_descriptor_set(descriptor_writes); + // self.device->wait(); // I have no idea how to enable UPDATE_AFTER_BIND in vuk } if (!info.dirty_material_indices.empty()) { diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 721b68bd..fc271d51 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -64,8 +64,6 @@ struct SceneRenderer { // Then what are they? // TODO: Per scene sky settings - Image invalid_image = {}; - ImageView invalid_image_view = {}; Image sky_transmittance_lut = {}; ImageView sky_transmittance_lut_view = {}; Image sky_multiscatter_lut = {}; From 219b28189b5786fc25c1d888b1b47336c1944767 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Wed, 6 Aug 2025 14:28:19 +0300 Subject: [PATCH 15/16] fix use after free on dirty materials --- Lorr/Engine/Asset/Asset.cc | 2 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 118 +++++++++++++----- Lorr/Engine/Graphics/Vulkan/Pipeline.cc | 4 + Lorr/Engine/Graphics/VulkanDevice.hh | 16 ++- .../shaders/passes/visbuffer_encode.slang | 2 + Lorr/Engine/Resources/shaders/scene.slang | 6 +- Lorr/Engine/Scene/SceneRenderer.cc | 106 ++++++++++------ Lorr/Engine/Scene/SceneRenderer.hh | 2 - xmake/packages.lua | 2 +- 9 files changed, 171 insertions(+), 87 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 94cb3598..da8568e6 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -1526,7 +1526,7 @@ auto AssetManager::delete_asset(const UUID &uuid) -> void { } } - LOG_TRACE("Deleted asset {}.", uuid.str()); + // LOG_TRACE("Deleted asset {}.", uuid.str()); } auto AssetManager::get_asset(const UUID &uuid) -> Asset * { diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 7947871e..cf9aa631 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -2,6 +2,12 @@ #include +// i hate this +PFN_vkCreateDescriptorPool vk_CreateDescriptorPool; +PFN_vkCreateDescriptorSetLayout vk_CreateDescriptorSetLayout; +PFN_vkAllocateDescriptorSets vk_AllocateDescriptorSets; +PFN_vkUpdateDescriptorSets vk_UpdateDescriptorSets; + namespace lr { constexpr fmtlog::LogLevel to_log_category(VkDebugUtilsMessageSeverityFlagBitsEXT severity) { switch (severity) { @@ -88,6 +94,7 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected std::expected std::expected on shader yet. + // WARN: this extension is only supported by // .add_pNext(&maintenance_8_features) - - // NOTE: LLVMPipe does not support this extension yet - //.add_pNext(&image_atomic_int64_features) .add_pNext(&vk10_features); auto device_result = device_builder.build(); if (!device_result) { @@ -191,6 +197,11 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected &&tar .on_begin_pass = on_begin_pass, .on_end_pass = on_end_pass, .user_data = &self, - } }); + } } + ); } auto Device::wait(this Device &self, LR_CALLSTACK) -> void { @@ -347,35 +359,81 @@ auto Device::wait(this Device &self, LR_CALLSTACK) -> void { self.runtime->wait_idle(); } -auto Device::create_persistent_descriptor_set(this Device &self, ls::span bindings, u32 index) - -> vuk::Unique { +auto Device::create_persistent_descriptor_set( + this Device &self, + u32 set_index, + ls::span bindings, + ls::span binding_flags +) -> vuk::PersistentDescriptorSet { ZoneScoped; - u32 descriptor_count = 0; - auto raw_bindings = std::vector(bindings.size()); - auto binding_flags = std::vector(bindings.size()); - for (const auto &[binding, raw_binding, raw_binding_flags] : std::views::zip(bindings, raw_bindings, binding_flags)) { - raw_binding.binding = binding.binding; - raw_binding.descriptorType = vuk::DescriptorBinding::vk_descriptor_type(binding.type); - raw_binding.descriptorCount = binding.descriptor_count; - raw_binding.stageFlags = VK_SHADER_STAGE_ALL; - raw_binding_flags = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; - descriptor_count += binding.descriptor_count; + LS_EXPECT(bindings.size() == binding_flags.size()); + + auto descriptor_sizes = std::vector(); + for (const auto &binding : bindings) { + LS_EXPECT(binding.descriptorType < VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT); + descriptor_sizes.emplace_back(binding.descriptorType, binding.descriptorCount); } - vuk::DescriptorSetLayoutCreateInfo layout_ci = { - .index = index, - .bindings = std::move(raw_bindings), - .flags = std::move(binding_flags), + auto pool_flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; + auto pool_info = VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = static_cast(pool_flags), + .maxSets = 1, + .poolSizeCount = static_cast(descriptor_sizes.size()), + .pPoolSizes = descriptor_sizes.data(), + }; + auto pool = VkDescriptorPool{}; + vk_CreateDescriptorPool(self.handle, &pool_info, nullptr, &pool); + + auto set_layout_binding_flags_info = VkDescriptorSetLayoutBindingFlagsCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = nullptr, + .bindingCount = static_cast(binding_flags.size()), + .pBindingFlags = binding_flags.data(), }; - return self.runtime->create_persistent_descriptorset(self.allocator.value(), layout_ci, descriptor_count); + auto set_layout_info = VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &set_layout_binding_flags_info, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + auto set_layout = VkDescriptorSetLayout{}; + vk_CreateDescriptorSetLayout(self.handle, &set_layout_info, nullptr, &set_layout); + + auto set_alloc_info = VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = pool, + .descriptorSetCount = 1, + .pSetLayouts = &set_layout, + }; + auto descriptor_set = VkDescriptorSet{}; + vk_AllocateDescriptorSets(self.handle, &set_alloc_info, &descriptor_set); + + auto persistent_set_create_info = vuk::DescriptorSetLayoutCreateInfo{ + .dslci = set_layout_info, + .index = set_index, + .bindings = std::vector(bindings.begin(), bindings.end()), + .flags = std::vector(binding_flags.begin(), binding_flags.end()), + }; + return vuk::PersistentDescriptorSet{ + .backing_pool = pool, + .set_layout_create_info = persistent_set_create_info, + .set_layout = set_layout, + .backing_set = descriptor_set, + .wdss = {}, + .descriptor_bindings = {}, + }; } -auto Device::commit_descriptor_set(this Device &self, vuk::PersistentDescriptorSet &set) -> void { +auto Device::commit_descriptor_set(this Device &self, ls::span writes) -> void { ZoneScoped; - set.commit(self.runtime.value()); + vk_UpdateDescriptorSets(self.handle, writes.size(), writes.data(), 0, nullptr); } auto Device::create_swap_chain(this Device &self, VkSurfaceKHR surface, ls::option old_swap_chain) diff --git a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc index e0557639..79b228c4 100644 --- a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc +++ b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc @@ -15,6 +15,10 @@ auto Pipeline::create( for (const auto &set : persistent_sets) { create_info.explicit_set_layouts.push_back(set.set_layout_create_info); + for (const auto &[binding, binding_flags] : std::views::zip(set.set_layout_create_info.bindings, set.set_layout_create_info.flags)) { + create_info + .set_binding_flags(set.set_layout_create_info.index, binding.binding, static_cast(binding_flags)); + } } auto slang_module = session.load_module({ .module_name = compile_info.module_name, .source = compile_info.shader_source }).value(); diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index 4a70d03b..daac9eb1 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -15,12 +15,6 @@ #include namespace lr { -struct BindlessDescriptorInfo { - u32 binding = 0; - vuk::DescriptorType type = {}; - u32 descriptor_count = 0; -}; - struct TransferManager { private: Device *device = nullptr; @@ -141,9 +135,13 @@ public: auto end_frame(this Device &, vuk::Value &&target_attachment) -> void; auto wait(this Device &, LR_THISCALL) -> void; - auto create_persistent_descriptor_set(this Device &, ls::span bindings, u32 index) - -> vuk::Unique; - auto commit_descriptor_set(this Device &, vuk::PersistentDescriptorSet &set) -> void; + auto create_persistent_descriptor_set( + this Device &, + u32 set_index, + ls::span bindings, + ls::span binding_flags + ) -> vuk::PersistentDescriptorSet; + auto commit_descriptor_set(this Device &, ls::span writes) -> void; auto create_swap_chain(this Device &, VkSurfaceKHR surface, ls::option old_swap_chain = ls::nullopt) -> std::expected; diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index b67f429e..d4970a31 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -54,6 +54,7 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { [[shader("fragment")]] func fs_main(VertexOutput input) -> u32 { +#if 1 let material = params.materials[input.material_index]; if (material.flags & MaterialFlag::HasAlbedoImage) { UVGradient grad; @@ -67,6 +68,7 @@ func fs_main(VertexOutput input) -> u32 { discard; } } +#endif std::atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, std::memory_order_acq_rel, std::MemoryLocation::Image, MemoryScope::QueueFamily); diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 9a5e3abd..fdc7c5b2 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -138,7 +138,7 @@ public struct Material { public u32 albedo_image_index = ~0u; public u32 normal_image_index = ~0u; public u32 emissive_image_index = ~0u; - public u32 metallic_rougness_image_index = ~0u; + public u32 metallic_roughness_image_index = ~0u; public u32 occlusion_image_index = ~0u; public func sample_albedo_color(in UVGradient grad) -> f32x4 { @@ -173,8 +173,8 @@ public struct Material { public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { - let color = material_images[this.metallic_rougness_image_index] - .sample_grad(material_samplers[this.metallic_rougness_image_index], grad.uv, grad.ddx, grad.ddy).bg; + let color = material_images[this.metallic_roughness_image_index] + .sample_grad(material_samplers[this.metallic_roughness_image_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 563fae5e..777db0ee 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -30,42 +30,24 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi auto shaders_root = asset_man.asset_root_path(AssetType::Shader); constexpr auto MATERIAL_COUNT = 1024_sz; - BindlessDescriptorInfo bindless_set_info[] = { - { .binding = 0, .type = vuk::DescriptorType::eSampler, .descriptor_count = MATERIAL_COUNT }, - { .binding = 1, .type = vuk::DescriptorType::eSampledImage, .descriptor_count = MATERIAL_COUNT }, + VkDescriptorSetLayoutBinding bindless_set_info[] = { + { .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = MATERIAL_COUNT, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + { .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = MATERIAL_COUNT, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, }; - self.materials_descriptor_set = self.device->create_persistent_descriptor_set(bindless_set_info, 1).release(); - auto invalid_image_info = ImageInfo{ - .format = vuk::Format::eR8G8B8A8Srgb, - .usage = vuk::ImageUsageFlagBits::eSampled, - .type = vuk::ImageType::e2D, - .extent = { .width = 1, .height = 1, .depth = 1 }, - .name = "Invalid", - }; - std::tie(self.invalid_image, self.invalid_image_view) = Image::create_with_view(*self.device, invalid_image_info).value(); - auto invalid_image = self.device->image_view(self.invalid_image_view.id()); - - auto full_white = 0xFFFFFFFF_u32; - transfer_man.wait_on(transfer_man.upload_staging(self.invalid_image_view, &full_white, sizeof(decltype(full_white)))); - - auto invalid_sampler_info = SamplerInfo{ - .min_filter = vuk::Filter::eLinear, - .mag_filter = vuk::Filter::eLinear, - .mipmap_mode = vuk::SamplerMipmapMode::eLinear, - .addr_u = vuk::SamplerAddressMode::eRepeat, - .addr_v = vuk::SamplerAddressMode::eRepeat, - .addr_w = vuk::SamplerAddressMode::eRepeat, - .compare_op = vuk::CompareOp::eNever, - }; - auto invalid_sampler = Sampler::create(*self.device, invalid_sampler_info).value(); - auto invalid_sampler_handle = self.device->sampler(invalid_sampler.id()); - for (auto i = 0_sz; i < MATERIAL_COUNT; i++) { - self.materials_descriptor_set.update_sampler(0, i, *invalid_sampler_handle); - self.materials_descriptor_set.update_sampled_image(1, i, *invalid_image, vuk::ImageLayout::eShaderReadOnlyOptimal); - } - self.device->commit_descriptor_set(self.materials_descriptor_set); - self.device->destroy(invalid_sampler.id()); + VkDescriptorBindingFlags bindless_set_binding_flags[] = { + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, + }; + self.materials_descriptor_set = self.device->create_persistent_descriptor_set(1, bindless_set_info, bindless_set_binding_flags); // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ @@ -333,15 +315,57 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in } if (!info.dirty_texture_indices.empty()) { - for (const auto &[texture_pair, index] : std::views::zip(info.dirty_textures, info.dirty_texture_indices)) { - auto image_view = self.device->image_view(texture_pair.n0); - auto sampler = self.device->sampler(texture_pair.n1); - self.materials_descriptor_set.update_sampler(0, index, sampler.value()); - self.materials_descriptor_set.update_sampled_image(1, index, image_view.value(), vuk::ImageLayout::eShaderReadOnlyOptimal); + auto sampler_descriptor_infos = std::vector(); + auto image_descriptor_infos = std::vector(); + for (const auto &[image_view_id, sampler_id] : info.dirty_textures) { + auto image_view = self.device->image_view(image_view_id); + auto sampler = self.device->sampler(sampler_id); + + sampler_descriptor_infos.push_back( + { .sampler = sampler.value().payload, // + .imageView = nullptr, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED } + ); + image_descriptor_infos.push_back( + { .sampler = nullptr, // + .imageView = image_view.value().payload, + .imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL } + ); + } + + auto descriptor_writes = std::vector(); + for (const auto &[i, descriptor_index] : std::views::zip(std::views::iota(0_u32), info.dirty_texture_indices)) { + auto sampler_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = self.materials_descriptor_set.backing_set, + .dstBinding = 0, + .dstArrayElement = descriptor_index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &sampler_descriptor_infos[i], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + descriptor_writes.push_back(sampler_write); + + auto image_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = self.materials_descriptor_set.backing_set, + .dstBinding = 1, + .dstArrayElement = descriptor_index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &image_descriptor_infos[i], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + descriptor_writes.push_back(image_write); } - self.device->commit_descriptor_set(self.materials_descriptor_set); - self.device->wait(); // I have no idea how to enable UPDATE_AFTER_BIND in vuk + self.device->commit_descriptor_set(descriptor_writes); + self.device->wait(); // TODO: figure out the invalid descriptor situation } if (!info.dirty_material_indices.empty()) { diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 721b68bd..fc271d51 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -64,8 +64,6 @@ struct SceneRenderer { // Then what are they? // TODO: Per scene sky settings - Image invalid_image = {}; - ImageView invalid_image_view = {}; Image sky_transmittance_lut = {}; ImageView sky_transmittance_lut_view = {}; Image sky_multiscatter_lut = {}; diff --git a/xmake/packages.lua b/xmake/packages.lua index cc0f3e12..5eb6bee4 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -60,6 +60,6 @@ add_requires("vuk 2025.07.09", { configs = { }, debug = is_mode("debug") }) add_requires("meshoptimizer v0.24", { debug = true }) -add_requires("ktx v4.4.0") +add_requires("ktx v4.4.0", { debug = true }) add_requires("svector v1.0.3") From 30917907608e7a078e92fc21eaabc89ed4905495 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 10 Aug 2025 14:17:03 +0300 Subject: [PATCH 16/16] lod selecting on the gpu --- Lorr/Engine/Asset/Asset.cc | 90 ++++---- Lorr/Engine/Asset/Asset.hh | 3 - Lorr/Engine/Graphics/Vulkan/Device.cc | 59 ++++- Lorr/Engine/Graphics/Vulkan/Image.cc | 75 ++++++- Lorr/Engine/Graphics/VulkanDevice.hh | 12 + Lorr/Engine/Resources/shaders/cull.slang | 29 ++- .../shaders/passes/cull_meshes.slang | 67 ------ .../shaders/passes/cull_meshlets.slang | 14 +- .../passes/generate_cull_commands.slang | 17 ++ .../shaders/passes/select_lods.slang | 82 +++++++ Lorr/Engine/Resources/shaders/scene.slang | 48 ++-- Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 2 +- Lorr/Engine/Scene/GPUScene.hh | 20 +- Lorr/Engine/Scene/Scene.cc | 81 +++---- Lorr/Engine/Scene/Scene.hh | 2 + Lorr/Engine/Scene/SceneRenderer.cc | 209 +++++++----------- Lorr/Engine/Scene/SceneRenderer.hh | 13 +- Lorr/ls/span.hh | 26 +-- xmake/packages.lua | 10 +- 19 files changed, 479 insertions(+), 380 deletions(-) delete mode 100644 Lorr/Engine/Resources/shaders/passes/cull_meshes.slang create mode 100644 Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang create mode 100644 Lorr/Engine/Resources/shaders/passes/select_lods.slang diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index da8568e6..562b69fc 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -123,7 +123,6 @@ struct Handle::Impl { std::shared_mutex textures_mutex = {}; SlotMap textures = {}; - std::vector dirty_textures = {}; std::shared_mutex materials_mutex = {}; SlotMap materials = {}; @@ -845,7 +844,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { nullptr, lod_index_count, TARGET_ERROR, - 0, + meshopt_SimplifyLockBorder, &result_error ); @@ -891,8 +890,10 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); + auto mesh_bb_min = glm::vec3(std::numeric_limits::max()); + auto mesh_bb_max = glm::vec3(std::numeric_limits::lowest()); auto meshlet_bounds = std::vector(meshlet_count); - for (const auto &[raw_meshlet, meshlet, meshlet_aabb] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds)) { + for (const auto &[raw_meshlet, meshlet, bounds] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds)) { // AABB computation auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); @@ -903,18 +904,33 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } + // Sphere and Cone computation + auto sphere_bounds = meshopt_computeMeshletBounds( + &indirect_vertex_indices[raw_meshlet.vertex_offset], + &local_triangle_indices[raw_meshlet.triangle_offset], + raw_meshlet.triangle_count, + reinterpret_cast(mesh_vertices.data()), + vertex_count, + sizeof(glm::vec3) + ); + meshlet.indirect_vertex_index_offset = raw_meshlet.vertex_offset; meshlet.local_triangle_index_offset = raw_meshlet.triangle_offset; meshlet.vertex_count = raw_meshlet.vertex_count; meshlet.triangle_count = raw_meshlet.triangle_count; - meshlet_aabb.aabb_min = meshlet_bb_min; - meshlet_aabb.aabb_max = meshlet_bb_max; + bounds.aabb_center = (meshlet_bb_max + meshlet_bb_min) * 0.5f; + bounds.aabb_extent = meshlet_bb_max - meshlet_bb_min; + bounds.sphere_center = glm::make_vec3(sphere_bounds.center); + bounds.sphere_radius = sphere_bounds.radius; - gpu_mesh.bounds.aabb_max = glm::max(gpu_mesh.bounds.aabb_max, meshlet_bb_max); - gpu_mesh.bounds.aabb_min = glm::min(gpu_mesh.bounds.aabb_min, meshlet_bb_min); + mesh_bb_min = glm::min(mesh_bb_min, meshlet_bb_min); + mesh_bb_max = glm::max(mesh_bb_max, meshlet_bb_max); } + gpu_mesh.bounds.aabb_center = (mesh_bb_max + mesh_bb_min) * 0.5f; + gpu_mesh.bounds.aabb_extent = mesh_bb_max - mesh_bb_min; + auto lod_upload_size = 0 // + ls::size_bytes(simplified_indices) // + ls::size_bytes(meshlets) // @@ -977,7 +993,6 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { mesh_upload_offset += ls::size_bytes(mesh_texcoords); } - // ignore spilling out buffer size by alignment auto gpu_mesh_buffer_handle = impl->device->buffer(gpu_mesh_buffer.id()); auto gpu_mesh_subrange = vuk::discard_buf("mesh", gpu_mesh_buffer_handle->subrange(0, mesh_upload_size)); gpu_mesh_subrange = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_subrange)); @@ -1093,6 +1108,22 @@ auto AssetManager::load_texture(const UUID &uuid, const TextureInfo &info) -> bo } } + auto sampler_info = SamplerInfo{ + .min_filter = vuk::Filter::eLinear, + .mag_filter = vuk::Filter::eLinear, + .mipmap_mode = vuk::SamplerMipmapMode::eLinear, + .addr_u = vuk::SamplerAddressMode::eRepeat, + .addr_v = vuk::SamplerAddressMode::eRepeat, + .addr_w = vuk::SamplerAddressMode::eRepeat, + .compare_op = vuk::CompareOp::eNever, + .max_anisotropy = 8.0f, + .mip_lod_bias = 0.0f, + .min_lod = 0.0f, + .max_lod = static_cast(mip_level_count - 1), + .use_anisotropy = true, + }; + auto sampler = Sampler::create(*impl->device, sampler_info).value(); + auto rel_path = fs::relative(asset_path, impl->root_path); auto image_info = ImageInfo{ .format = format, @@ -1178,24 +1209,7 @@ auto AssetManager::load_texture(const UUID &uuid, const TextureInfo &info) -> bo { auto write_lock = std::unique_lock(impl->textures_mutex); auto *asset = this->get_asset(uuid); - auto sampler_info = SamplerInfo{ - .min_filter = vuk::Filter::eLinear, - .mag_filter = vuk::Filter::eLinear, - .mipmap_mode = vuk::SamplerMipmapMode::eLinear, - .addr_u = vuk::SamplerAddressMode::eRepeat, - .addr_v = vuk::SamplerAddressMode::eRepeat, - .addr_w = vuk::SamplerAddressMode::eRepeat, - .compare_op = vuk::CompareOp::eNever, - .max_anisotropy = 8.0f, - .mip_lod_bias = 0.0f, - .min_lod = 0.0f, - .max_lod = static_cast(mip_level_count - 1), - .use_anisotropy = true, - }; - auto sampler = Sampler::create(*impl->device, sampler_info).value(); asset->texture_id = impl->textures.create_slot(Texture{ .image = image, .image_view = image_view, .sampler = sampler }); - write_lock.unlock(); - this->set_texture_dirty(asset->texture_id); } LOG_TRACE("Loaded texture {}.", uuid.str()); @@ -1645,32 +1659,6 @@ auto AssetManager::get_scene(SceneID scene_id) -> Scene * { return impl->scenes.slot(scene_id)->get(); } -auto AssetManager::set_texture_dirty(TextureID texture_id) -> void { - ZoneScoped; - - auto read_lock = std::shared_lock(impl->textures_mutex); - if (std::ranges::find(impl->dirty_textures, texture_id) != impl->dirty_textures.end()) { - return; - } - - read_lock.unlock(); - auto write_lock = std::unique_lock(impl->textures_mutex); - impl->dirty_textures.emplace_back(texture_id); -} - -auto AssetManager::get_dirty_texture_ids() -> std::vector { - ZoneScoped; - - auto read_lock = std::shared_lock(impl->textures_mutex); - auto dirty_textures = std::vector(impl->dirty_textures); - - read_lock.unlock(); - auto write_lock = std::unique_lock(impl->textures_mutex); - impl->dirty_textures.clear(); - - return dirty_textures; -} - auto AssetManager::set_material_dirty(MaterialID material_id) -> void { ZoneScoped; diff --git a/Lorr/Engine/Asset/Asset.hh b/Lorr/Engine/Asset/Asset.hh index cbb1690f..9f5abd1c 100755 --- a/Lorr/Engine/Asset/Asset.hh +++ b/Lorr/Engine/Asset/Asset.hh @@ -118,9 +118,6 @@ struct AssetManager : Handle { auto get_scene(const UUID &uuid) -> Scene *; auto get_scene(SceneID scene_id) -> Scene *; - auto set_texture_dirty(TextureID texture_id) -> void; - auto get_dirty_texture_ids() -> std::vector; - auto set_material_dirty(MaterialID material_id) -> void; auto get_dirty_material_ids() -> std::vector; }; diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index cf9aa631..fe4af8ed 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -249,6 +249,63 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected { + constexpr auto MAX_DESCRIPTORS = 1024_sz; + VkDescriptorSetLayoutBinding bindless_set_info[] = { + // Samplers + { .binding = DescriptorTable_SamplerIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + // Sampled Images + { .binding = DescriptorTable_SampledImageIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + // Storage Images + { .binding = DescriptorTable_StorageImageIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + }; + + constexpr static auto bindless_flags = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + VkDescriptorBindingFlags bindless_set_binding_flags[] = { + bindless_flags, + bindless_flags, + bindless_flags, + }; + self.resources.descriptor_set = self.create_persistent_descriptor_set(1, bindless_set_info, bindless_set_binding_flags); + + auto invalid_image_info = ImageInfo{ + .format = vuk::Format::eR8G8B8A8Srgb, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .type = vuk::ImageType::e2D, + .extent = vuk::Extent3D(1_u32, 1_u32, 1_u32), + .name = "Invalid Placeholder Image", + }; + auto [invalid_image, invalid_image_view] = Image::create_with_view(self, invalid_image_info).value(); + + auto invalid_image_data = 0xFFFFFFFF_u32; + auto fut = self.transfer_manager.upload_staging(invalid_image_view, &invalid_image_data, sizeof(u32)); + fut = fut.as_released(vuk::Access::eFragmentSampled, vuk::DomainFlagBits::eGraphicsQueue); + self.transfer_manager.wait_on(std::move(fut)); + + auto invalid_sampler_info = SamplerInfo{}; + std::ignore = Sampler::create(self, invalid_sampler_info).value(); + return {}; } @@ -355,7 +412,7 @@ auto Device::end_frame(this Device &self, vuk::Value &&tar auto Device::wait(this Device &self, LR_CALLSTACK) -> void { ZoneScopedN("Device Wait Idle"); - LOG_TRACE("Device wait idle triggered at {}:{}!", LOC.file_name(), LOC.line()); + LOG_TRACE("Device wait idle triggered at {}!", LOC); self.runtime->wait_idle(); } diff --git a/Lorr/Engine/Graphics/Vulkan/Image.cc b/Lorr/Engine/Graphics/Vulkan/Image.cc index 669bd856..3ce83f56 100644 --- a/Lorr/Engine/Graphics/Vulkan/Image.cc +++ b/Lorr/Engine/Graphics/Vulkan/Image.cc @@ -4,6 +4,8 @@ #include "Engine/Memory/Stack.hh" +#include + namespace lr { auto Image::create(Device &device, const ImageInfo &info, LR_CALLSTACK) -> std::expected { ZoneScoped; @@ -132,6 +134,22 @@ auto ImageView::create(Device &device, Image &image, const ImageViewInfo &info, return std::unexpected(result.error()); } + auto image_descriptors = ankerl::svector(); + if (info.image_usage & vuk::ImageUsageFlagBits::eSampled) { + image_descriptors.push_back( + { .sampler = nullptr, // + .imageView = image_view_handle.payload, + .imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL } + ); + } + if (info.image_usage & vuk::ImageUsageFlagBits::eStorage) { + image_descriptors.push_back( + { .sampler = nullptr, // + .imageView = image_view_handle.payload, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL } + ); + } + auto image_view = ImageView{}; image_view.format_ = image.format(); image_view.extent_ = image.extent(); @@ -141,6 +159,38 @@ auto ImageView::create(Device &device, Image &image, const ImageViewInfo &info, image_view.id_ = device.resources.image_views.create_slot(static_cast(image_view_handle)); device.set_name(image_view, info.name); + auto &bindless_set = device.get_descriptor_set(); + auto descriptor_writes = ankerl::svector(); + if (info.image_usage & vuk::ImageUsageFlagBits::eSampled) { + descriptor_writes.push_back( + { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_SampledImageIndex, + .dstArrayElement = image_view.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &image_descriptors[0], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr } + ); + } + if (info.image_usage & vuk::ImageUsageFlagBits::eStorage) { + descriptor_writes.push_back( + { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_StorageImageIndex, + .dstArrayElement = image_view.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &image_descriptors[1], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr } + ); + } + device.commit_descriptor_set({ descriptor_writes.begin(), descriptor_writes.size() }); + return image_view; } @@ -243,9 +293,28 @@ auto Sampler::create(Device &device, const SamplerInfo &info, [[maybe_unused]] v }; auto sampler = Sampler{}; - sampler.id_ = device.resources.samplers.create_slot(); - auto *sampler_handle = device.resources.samplers.slot(sampler.id_); - *sampler_handle = device.runtime->acquire_sampler(create_info, device.frame_count()); + auto sampler_handle = device.runtime->acquire_sampler(create_info, device.frame_count()); + auto sampler_descriptor = VkDescriptorImageInfo{ + .sampler = sampler_handle.payload, + .imageView = nullptr, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; + sampler.id_ = device.resources.samplers.create_slot(static_cast(sampler_handle)); + + auto &bindless_set = device.get_descriptor_set(); + auto descriptor_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_SamplerIndex, + .dstArrayElement = sampler.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &sampler_descriptor, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + device.commit_descriptor_set(descriptor_write); return sampler; } diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index daac9eb1..7adbf492 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -89,12 +89,19 @@ protected: auto release(this TransferManager &) -> void; }; +enum : u32 { + DescriptorTable_SamplerIndex = 0, + DescriptorTable_SampledImageIndex, + DescriptorTable_StorageImageIndex, +}; + struct DeviceResources { SlotMap buffers = {}; SlotMap images = {}; SlotMap image_views = {}; SlotMap samplers = {}; SlotMap pipelines = {}; + vuk::PersistentDescriptorSet descriptor_set = {}; }; struct Device { @@ -126,6 +133,8 @@ private: public: auto init(this Device &, usize frame_count) -> std::expected; + auto init_resources(this Device &) -> std::expected; + auto destroy(this Device &) -> void; auto new_slang_session(this Device &, const SlangSessionInfo &info) -> ls::option; @@ -184,6 +193,9 @@ public: auto get_pass_queries() -> auto & { return pass_queries; } + auto get_descriptor_set() -> auto & { + return resources.descriptor_set; + } auto non_coherent_atom_size() -> u32 { return device_limits.nonCoherentAtomSize; diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index 7bfd823c..b84547a7 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -4,22 +4,19 @@ import std; import gpu; import debug_drawer; -public func normalize_plane(f32x4 p) -> f32x4 { - return p / length(p.xyz); -} - public struct ScreenAabb { public f32x3 min; public f32x3 max; } -// https://zeux.io/2023/01/12/approximate-projected-bounds -public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_min, f32x3 aabb_extent) -> Optional { +// Credits: +// - https://zeux.io/2023/01/12/approximate-projected-bounds +public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_center, f32x3 aabb_extent) -> Optional { let SX = mul(mvp, f32x4(aabb_extent.x, 0.0, 0.0, 0.0)); let SY = mul(mvp, f32x4(0.0, aabb_extent.y, 0.0, 0.0)); let SZ = mul(mvp, f32x4(0.0, 0.0, aabb_extent.z, 0.0)); - let P0 = mul(mvp, f32x4(aabb_min, 1.0)); + let P0 = mul(mvp, f32x4(aabb_center - aabb_extent * 0.5, 1.0)); let P1 = P0 + SZ; let P2 = P0 + SY; let P3 = P2 + SZ; @@ -51,14 +48,28 @@ public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_min, f32x3 aabb_exten return ret; } +func normalize_plane(f32x4 p) -> f32x4 { + return p / length(p.xyz); +} +// Credits: +// - https://github.com/SparkyPotato/radiance/blob/eaf18b3bbf4942234fa929ef6ad5e04e3c45fc62/shaders/passes/mesh/cull.slang#L340 +// - https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/ +// - https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/ public func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> bool { f32x4 planes[] = { + // Left normalize_plane(mvp[3] + mvp[0]), + // Right normalize_plane(mvp[3] - mvp[0]), + // Bottom normalize_plane(mvp[3] + mvp[1]), + // Top normalize_plane(mvp[3] - mvp[1]), - normalize_plane(mvp[2]) + // Near + normalize_plane(mvp[2]), + // Far + normalize_plane(mvp[3] - mvp[2]) }; let aabb_half_extent = aabb_extent * 0.5; @@ -100,4 +111,4 @@ public func test_occlusion( var uv = (min_uv + max_uv) * 0.5; let d = hiz_image.sample_mip(hiz_sampler, uv, mip); return screen_aabb.max.z <= d; -} \ No newline at end of file +} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang deleted file mode 100644 index f5892339..00000000 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshes.slang +++ /dev/null @@ -1,67 +0,0 @@ -import std; -import gpu; -import scene; -import cull; -import debug_drawer; - -struct ShaderParameters { - ConstantBuffer camera; - StructuredBuffer mesh_instances; - StructuredBuffer meshes; - StructuredBuffer transforms; - - RWStructuredBuffer cull_meshlets_cmd; - RWStructuredBuffer visible_mesh_instances_indices; - RWStructuredBuffer debug_drawer; -}; - -#ifndef CULLING_MESHES_COUNT - #define CULLING_MESHES_COUNT 64 -#endif - -#ifndef CULLING_MESHLET_COUNT - #define CULLING_MESHLET_COUNT 64 -#endif - -[[shader("compute")]] -[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] -func cs_main( - uint3 thread_id : SV_DispatchThreadID, - uniform ParameterBlock params, - uniform u32 mesh_instances_count, - uniform CullFlags cull_flags -) -> void { - let mesh_instance_index = thread_id.x; - if (mesh_instance_index >= mesh_instances_count) { - return; - } - - let mesh_instance = params.mesh_instances[mesh_instance_index]; - let mesh = params.meshes[mesh_instance.mesh_index]; - let mesh_lod = mesh.lods[mesh_instance.lod_index]; - let transform = params.transforms[mesh_instance.transform_index]; - - let aabb_min = mesh.bounds.aabb_min; - let aabb_max = mesh.bounds.aabb_max; - let aabb_extent = aabb_max - aabb_min; - let aabb_center = (aabb_min + aabb_max) * 0.5; - - var visible = true; - if (visible && (cull_flags & CullFlags::MeshletFrustum)) { - let cur_mvp = mul(params.camera.projection_view_mat, transform.world); - visible = test_frustum(cur_mvp, aabb_center, aabb_extent); - - var debug_aabb = DebugAABB(); - debug_aabb.position = mul(transform.world, f32x4(aabb_center, 1.0)).xyz; - debug_aabb.size = mul(transform.world, f32x4(aabb_extent, 0.0)).xyz; - debug_aabb.color = f32x3(0.0, 1.0, 0.0); - debug_aabb.coord = DebugDrawCoord::World; - debug_draw_aabb(params.debug_drawer[0], debug_aabb); - } - - if (visible) { - let workgroup_count = (mesh_lod.meshlet_count + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; - let index = std::atomic_add(params.cull_meshlets_cmd[0].x, workgroup_count, std::memory_order_relaxed); - // params.visible_mesh_instances_indices[index] = mesh_instance_index; - } -} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 92d2186b..a92cfdfb 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -14,6 +14,7 @@ struct ShaderParameters { StructuredBuffer transforms; Image2D hiz_image; Sampler hiz_sampler; + StructuredBuffer meshlet_instances_count; RWStructuredBuffer cull_triangles_cmd; RWStructuredBuffer visible_meshlet_instances_indices; @@ -29,9 +30,9 @@ struct ShaderParameters { func cs_main( uint3 thread_id : SV_DispatchThreadID, uniform ParameterBlock params, - uniform u32 meshlet_instance_count, uniform CullFlags cull_flags ) -> void { + let meshlet_instance_count = params.meshlet_instances_count[0]; let meshlet_instance_index = thread_id.x; if (meshlet_instance_index >= meshlet_instance_count) { return; @@ -44,22 +45,17 @@ func cs_main( let mesh_lod = mesh.lods[mesh_instance.lod_index]; let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; - let aabb_min = bounds.aabb_min; - let aabb_max = bounds.aabb_max; - let aabb_extent = aabb_max - aabb_min; - let aabb_center = (aabb_min + aabb_max) * 0.5; - var visible = true; if (visible && (cull_flags & CullFlags::MeshletFrustum)) { let cur_mvp = mul(params.camera.projection_view_mat, transform.world); - visible = test_frustum(cur_mvp, aabb_center, aabb_extent); + visible = test_frustum(cur_mvp, bounds.aabb_center, bounds.aabb_extent); } if (visible && (cull_flags & CullFlags::Occlusion)) { let prev_mvp = mul(params.camera.frustum_projection_view_mat, transform.world); - if (let screen_aabb = project_aabb(prev_mvp, params.camera.near_clip, aabb_min, aabb_extent)) { + if (let screen_aabb = project_aabb(prev_mvp, params.camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { visible = !test_occlusion(screen_aabb, params.hiz_image, params.hiz_sampler); - if (visible) { + if (visible && true) { let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; var debug_rect = DebugRect(); diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang new file mode 100644 index 00000000..3495f0fe --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -0,0 +1,17 @@ +import std; +import gpu; + +struct ShaderParameters { + StructuredBuffer meshlet_instances_count; + + RWStructuredBuffer cull_meshlets_cmd; +}; + +[[shader("compute")]] +[[numthreads(1, 1, 1)]] +func cs_main( + uniform ParameterBlock params +) -> void { + params.cull_meshlets_cmd[0].x = (params.meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; +} + diff --git a/Lorr/Engine/Resources/shaders/passes/select_lods.slang b/Lorr/Engine/Resources/shaders/passes/select_lods.slang new file mode 100644 index 00000000..f5d9cce4 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/select_lods.slang @@ -0,0 +1,82 @@ +import std; +import gpu; +import scene; +import cull; +import debug_drawer; + +struct ShaderParameters { + ConstantBuffer camera; + StructuredBuffer meshes; + StructuredBuffer transforms; + + RWStructuredBuffer mesh_instances; + RWStructuredBuffer meshlet_instances; + RWStructuredBuffer meshlet_instances_count; + RWStructuredBuffer debug_drawer; +}; + +#ifndef CULLING_MESHES_COUNT + #define CULLING_MESHES_COUNT 64 +#endif + +[[shader("compute")]] +[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] +func cs_main( + uint3 thread_id : SV_DispatchThreadID, + uniform ParameterBlock params, + uniform u32 mesh_instances_count, + uniform CullFlags cull_flags +) -> void { + let mesh_instance_index = thread_id.x; + if (mesh_instance_index >= mesh_instances_count) { + return; + } + + let mesh_instance = ¶ms.mesh_instances[mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mvp = mul(params.camera.projection_view_mat, transform.world); + if (!test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + return; + } + + var lod_index = 0; + if (true) { + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - params.camera.position) - 0.5 * aabb_rough_extent, 0.0); + + // Avoiding the atan here + let rough_resolution = max(params.camera.resolution.x, params.camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < params.camera.acceptable_lod_error) { + lod_index = i; + } else { + break; + } + } + } + + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_instance_offset = std::atomic_add(params.meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { + let offset = meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + params.meshlet_instances[offset] = meshlet_instance; + } +} diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index fdc7c5b2..90bdc0a9 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -72,6 +72,7 @@ public struct Camera { public f32 near_clip; public f32 far_clip; public f32x2 resolution; + public f32 acceptable_lod_error; }; public struct Transform { @@ -107,9 +108,11 @@ public struct UVGradient { }; [[vk::binding(0, 1)]] -Sampler material_samplers[]; +Sampler bindless_samplers[]; [[vk::binding(1, 1)]] -Image2D material_images[]; +Image2D bindless_images[]; +[[vk::binding(2, 1)]] +StorageImage2D bindless_storage_images[]; public enum MaterialFlag : u32 { None = 0, @@ -135,16 +138,17 @@ public struct Material { public f32 metallic_factor = 0.0; public f32 alpha_cutoff = 0.0; public MaterialFlag flags = MaterialFlag::None; - public u32 albedo_image_index = ~0u; - public u32 normal_image_index = ~0u; - public u32 emissive_image_index = ~0u; - public u32 metallic_roughness_image_index = ~0u; - public u32 occlusion_image_index = ~0u; + public u32 sampler_index = 0; + public u32 albedo_image_index = 0; + public u32 normal_image_index = 0; + public u32 emissive_image_index = 0; + public u32 metallic_roughness_image_index = 0; + public u32 occlusion_image_index = 0; public func sample_albedo_color(in UVGradient grad) -> f32x4 { if (this.flags & MaterialFlag::HasAlbedoImage) { - let color = material_images[this.albedo_image_index] - .sample_grad(material_samplers[this.albedo_image_index], grad.uv, grad.ddx, grad.ddy); + let color = bindless_images[this.albedo_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy); return this.albedo_color * color; } @@ -152,18 +156,14 @@ public struct Material { } public func sample_normal_color(in UVGradient grad) -> f32x3 { - if (this.flags & MaterialFlag::HasNormalImage) { - return material_images[this.normal_image_index] - .sample_grad(material_samplers[this.normal_image_index], grad.uv, grad.ddx, grad.ddy).rgb; - } - - return { 0.0 }; + return bindless_images[this.normal_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; } public func sample_emissive_color(in UVGradient grad) -> f32x3 { if (this.flags & MaterialFlag::HasEmissiveImage) { - let color = material_images[this.emissive_image_index] - .sample_grad(material_samplers[this.emissive_image_index], grad.uv, grad.ddx, grad.ddy).rgb; + let color = bindless_images[this.emissive_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; return this.emissive_color * color; } @@ -173,8 +173,8 @@ public struct Material { public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { - let color = material_images[this.metallic_roughness_image_index] - .sample_grad(material_samplers[this.metallic_roughness_image_index], grad.uv, grad.ddx, grad.ddy).bg; + let color = bindless_images[this.metallic_roughness_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } @@ -183,8 +183,8 @@ public struct Material { public func sample_occlusion_color(in UVGradient grad) -> f32 { if (this.flags & MaterialFlag::HasOcclusionImage) { - return material_images[this.occlusion_image_index] - .sample_grad(material_samplers[this.occlusion_image_index], grad.uv, grad.ddx, grad.ddy).r; + return bindless_images[this.occlusion_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).r; } return 1.0; @@ -257,8 +257,10 @@ public struct Meshlet { }; public struct Bounds { - public f32x3 aabb_min = {}; - public f32x3 aabb_max = {}; + public f32x3 aabb_center = {}; + public f32x3 aabb_extent = {}; + public f32x3 sphere_center = {}; + public f32 sphere_radius = 0.0f; }; public struct MeshletInstance { diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index ce922c80..105532e3 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -28,8 +28,8 @@ ECS_COMPONENT_BEGIN(Camera) ECS_COMPONENT_MEMBER(far_clip, f32, 1000.0f) ECS_COMPONENT_MEMBER(axis_velocity, glm::vec3, { 0.0, 0.0, 0.0 }) ECS_COMPONENT_MEMBER(velocity_mul, f32, 1.0) - ECS_COMPONENT_MEMBER(freeze_frustum, bool, false) ECS_COMPONENT_MEMBER(frustum_projection_view_mat, glm::mat4, glm::mat4(1.0)) + ECS_COMPONENT_MEMBER(acceptable_lod_error, f32, 2.0f) ECS_COMPONENT_END(); ECS_COMPONENT_TAG(PerspectiveCamera); diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 04d21e17..3a7806bc 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -109,6 +109,7 @@ struct Camera { alignas(4) f32 near_clip = {}; alignas(4) f32 far_clip = {}; alignas(4) glm::vec2 resolution = {}; + alignas(4) f32 acceptable_lod_error = 0.0f; }; enum class TransformID : u64 { Invalid = ~0_u64 }; @@ -143,16 +144,19 @@ struct Material { alignas(4) f32 metallic_factor = 0.0f; alignas(4) f32 alpha_cutoff = 0.0f; alignas(4) MaterialFlag flags = MaterialFlag::None; - alignas(4) u32 albedo_image_index = ~0_u32; - alignas(4) u32 normal_image_index = ~0_u32; - alignas(4) u32 emissive_image_index = ~0_u32; - alignas(4) u32 metallic_roughness_image_index = ~0_u32; - alignas(4) u32 occlusion_image_index = ~0_u32; + alignas(4) u32 sampler_index = 0; + alignas(4) u32 albedo_image_index = 0; + alignas(4) u32 normal_image_index = 0; + alignas(4) u32 emissive_image_index = 0; + alignas(4) u32 metallic_roughness_image_index = 0; + alignas(4) u32 occlusion_image_index = 0; }; struct Bounds { - alignas(4) glm::vec3 aabb_min = {}; - alignas(4) glm::vec3 aabb_max = {}; + alignas(4) glm::vec3 aabb_center = {}; + alignas(4) glm::vec3 aabb_extent = {}; + alignas(4) glm::vec3 sphere_center = {}; + alignas(4) f32 sphere_radius = 0.0f; }; struct MeshletInstance { @@ -191,7 +195,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 1_sz; + constexpr static auto MAX_LODS = 8_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index c7b1cb8c..0d64b6db 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -153,6 +153,8 @@ auto Scene::destroy(this Scene &self) -> void { } } + self.mesh_instance_count = 0; + self.max_meshlet_instance_count = 0; self.root.destruct(); self.name.clear(); self.root.clear(); @@ -550,13 +552,9 @@ auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &i camera_data.near_clip = c.near_clip; camera_data.far_clip = c.far_clip; camera_data.resolution = glm::vec2(static_cast(info.extent.width), static_cast(info.extent.height)); - - if (!c.freeze_frustum) { - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - c.frustum_projection_view_mat = camera_data.projection_view_mat; - } else { - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - } + camera_data.acceptable_lod_error = c.acceptable_lod_error; + camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; + c.frustum_projection_view_mat = camera_data.projection_view_mat; }); ls::option sun_data = ls::nullopt; @@ -712,9 +710,9 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared auto &app = Application::get(); + auto max_meshlet_instance_count = 0_u32; auto gpu_meshes = std::vector(); auto gpu_mesh_instances = std::vector(); - auto gpu_meshlet_instances = std::vector(); if (self.models_dirty) { for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { @@ -729,44 +727,30 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared // ── INSTANCING ────────────────────────────────────────────────── for (const auto transform_id : transform_ids) { - auto mesh_instance_index = static_cast(gpu_mesh_instances.size()); - - auto lod_index = 0; - const auto &lod = gpu_mesh.lods[lod_index]; + auto lod0_index = 0; + const auto &lod0 = gpu_mesh.lods[lod0_index]; auto &mesh_instance = gpu_mesh_instances.emplace_back(); mesh_instance.mesh_index = mesh_index; - mesh_instance.lod_index = lod_index; + mesh_instance.lod_index = lod0_index; mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; - - for (u32 meshlet_index = 0; meshlet_index < lod.meshlet_count; meshlet_index++) { - auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_instance_index = mesh_instance_index; - meshlet_instance.meshlet_index = meshlet_index; - } + max_meshlet_instance_count += lod0.meshlet_count; } } } - } - auto dirty_texture_ids = app.asset_man.get_dirty_texture_ids(); - auto dirty_texture_indices = std::vector(dirty_texture_ids.size()); - auto dirty_textures = std::vector>(dirty_texture_ids.size()); - for (const auto &[texture_pair, index, id] : std::views::zip(dirty_textures, dirty_texture_indices, dirty_texture_ids)) { - auto *texture = app.asset_man.get_texture(id); - texture_pair = ls::pair(texture->image_view.id(), texture->sampler.id()); - index = SlotMap_decode_id(id).index; + self.mesh_instance_count = gpu_mesh_instances.size(); + self.max_meshlet_instance_count = max_meshlet_instance_count; } - auto uuid_to_index = [&](const UUID &uuid) -> ls::option { + auto uuid_to_image_index = [&](const UUID &uuid) -> ls::option { if (!app.asset_man.is_texture_loaded(uuid)) { return ls::nullopt; } - auto *texture_asset = app.asset_man.get_asset(uuid); - - return SlotMap_decode_id(texture_asset->texture_id).index; + auto *texture = app.asset_man.get_texture(uuid); + return texture->image_view.index(); }; auto dirty_material_ids = app.asset_man.get_dirty_material_ids(); @@ -774,14 +758,20 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared auto dirty_material_indices = std::vector(dirty_material_ids.size()); for (const auto &[gpu_material, index, id] : std::views::zip(gpu_materials, dirty_material_indices, dirty_material_ids)) { const auto *material = app.asset_man.get_material(id); - auto albedo_image_index = uuid_to_index(material->albedo_texture); - auto normal_image_index = uuid_to_index(material->normal_texture); - auto emissive_image_index = uuid_to_index(material->emissive_texture); - auto metallic_roughness_image_index = uuid_to_index(material->metallic_roughness_texture); - auto occlusion_image_index = uuid_to_index(material->occlusion_texture); + auto albedo_image_index = uuid_to_image_index(material->albedo_texture); + auto normal_image_index = uuid_to_image_index(material->normal_texture); + auto emissive_image_index = uuid_to_image_index(material->emissive_texture); + auto metallic_roughness_image_index = uuid_to_image_index(material->metallic_roughness_texture); + auto occlusion_image_index = uuid_to_image_index(material->occlusion_texture); + auto sampler_index = 0_u32; auto flags = GPU::MaterialFlag::None; - flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; + if (albedo_image_index.has_value()) { + auto *texture = app.asset_man.get_texture(material->albedo_texture); + sampler_index = texture->sampler.index(); + flags |= GPU::MaterialFlag::HasAlbedoImage; + } + flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; @@ -793,26 +783,25 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> Prepared gpu_material.metallic_factor = material->metallic_factor; gpu_material.alpha_cutoff = material->alpha_cutoff; gpu_material.flags = flags; - gpu_material.albedo_image_index = albedo_image_index.value_or(~0_u32); - gpu_material.normal_image_index = normal_image_index.value_or(~0_u32); - gpu_material.emissive_image_index = emissive_image_index.value_or(~0_u32); - gpu_material.metallic_roughness_image_index = metallic_roughness_image_index.value_or(~0_u32); - gpu_material.occlusion_image_index = occlusion_image_index.value_or(~0_u32); + gpu_material.sampler_index = sampler_index; + gpu_material.albedo_image_index = albedo_image_index.value_or(0_u32); + gpu_material.normal_image_index = normal_image_index.value_or(0_u32); + gpu_material.emissive_image_index = emissive_image_index.value_or(0_u32); + gpu_material.metallic_roughness_image_index = metallic_roughness_image_index.value_or(0_u32); + gpu_material.occlusion_image_index = occlusion_image_index.value_or(0_u32); index = SlotMap_decode_id(id).index; } auto prepare_info = FramePrepareInfo{ - .mesh_instance_count = static_cast(self.rendering_meshes_map.size()), - .dirty_texture_indices = dirty_texture_indices, - .dirty_textures = dirty_textures, + .mesh_instance_count = self.mesh_instance_count, + .max_meshlet_instance_count = self.max_meshlet_instance_count, .dirty_transform_ids = self.dirty_transforms, .gpu_transforms = self.transforms.slots_unsafe(), .dirty_material_indices = dirty_material_indices, .gpu_materials = gpu_materials, .gpu_meshes = gpu_meshes, .gpu_mesh_instances = gpu_mesh_instances, - .gpu_meshlet_instances = gpu_meshlet_instances, }; auto prepared_frame = renderer.prepare_frame(prepare_info); diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 76b69eb3..124ab661 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -51,6 +51,8 @@ private: std::vector dirty_transforms = {}; bool models_dirty = false; + u32 mesh_instance_count = 0; + u32 max_meshlet_instance_count = 0; GPU::CullFlags cull_flags = GPU::CullFlags::All; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 777db0ee..5f81ab3a 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -27,28 +27,9 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi auto &app = Application::get(); auto &asset_man = app.asset_man; auto &transfer_man = app.device.transfer_man(); + auto &bindless_descriptor_set = app.device.get_descriptor_set(); auto shaders_root = asset_man.asset_root_path(AssetType::Shader); - constexpr auto MATERIAL_COUNT = 1024_sz; - VkDescriptorSetLayoutBinding bindless_set_info[] = { - { .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .descriptorCount = MATERIAL_COUNT, - .stageFlags = VK_SHADER_STAGE_ALL, - .pImmutableSamplers = nullptr }, - { .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = MATERIAL_COUNT, - .stageFlags = VK_SHADER_STAGE_ALL, - .pImmutableSamplers = nullptr }, - }; - - VkDescriptorBindingFlags bindless_set_binding_flags[] = { - VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, - VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, - }; - self.materials_descriptor_set = self.device->create_persistent_descriptor_set(1, bindless_set_info, bindless_set_binding_flags); - // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ .definitions = { @@ -123,11 +104,17 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi Pipeline::create(*self.device, default_slang_session, sky_final_pipeline_info).value(); // ── VISBUFFER ─────────────────────────────────────────────────────── - auto vis_cull_meshes_pipeline_info = PipelineCompileInfo{ - .module_name = "passes.cull_meshes", + auto generate_cull_commands_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.generate_cull_commands", + .entry_points = { "cs_main" }, + }; + Pipeline::create(*self.device, default_slang_session, generate_cull_commands_pipeline_info).value(); + + auto vis_select_lods_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.select_lods", .entry_points = { "cs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_cull_meshes_pipeline_info).value(); + Pipeline::create(*self.device, default_slang_session, vis_select_lods_pipeline_info).value(); auto vis_cull_meshlets_pipeline_info = PipelineCompileInfo{ .module_name = "passes.cull_meshlets", @@ -145,7 +132,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_encode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, self.materials_descriptor_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, bindless_descriptor_set).value(); auto vis_clear_pipeline_info = PipelineCompileInfo{ .module_name = "passes.visbuffer_clear", @@ -157,7 +144,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_decode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, self.materials_descriptor_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, bindless_descriptor_set).value(); // ── PBR ───────────────────────────────────────────────────────────── auto pbr_basic_pipeline_info = PipelineCompileInfo{ @@ -314,60 +301,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.transforms_buffer = self.transforms_buffer.acquire(*self.device, "transforms", vuk::Access::eMemoryRead); } - if (!info.dirty_texture_indices.empty()) { - auto sampler_descriptor_infos = std::vector(); - auto image_descriptor_infos = std::vector(); - for (const auto &[image_view_id, sampler_id] : info.dirty_textures) { - auto image_view = self.device->image_view(image_view_id); - auto sampler = self.device->sampler(sampler_id); - - sampler_descriptor_infos.push_back( - { .sampler = sampler.value().payload, // - .imageView = nullptr, - .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED } - ); - image_descriptor_infos.push_back( - { .sampler = nullptr, // - .imageView = image_view.value().payload, - .imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL } - ); - } - - auto descriptor_writes = std::vector(); - for (const auto &[i, descriptor_index] : std::views::zip(std::views::iota(0_u32), info.dirty_texture_indices)) { - auto sampler_write = VkWriteDescriptorSet{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = self.materials_descriptor_set.backing_set, - .dstBinding = 0, - .dstArrayElement = descriptor_index, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .pImageInfo = &sampler_descriptor_infos[i], - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - descriptor_writes.push_back(sampler_write); - - auto image_write = VkWriteDescriptorSet{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = self.materials_descriptor_set.backing_set, - .dstBinding = 1, - .dstArrayElement = descriptor_index, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = &image_descriptor_infos[i], - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - descriptor_writes.push_back(image_write); - } - - self.device->commit_descriptor_set(descriptor_writes); - self.device->wait(); // TODO: figure out the invalid descriptor situation - } - if (!info.dirty_material_indices.empty()) { auto rebuild_materials = !self.materials_buffer || self.materials_buffer.data_size() <= info.gpu_materials.size_bytes(); self.materials_buffer = self.materials_buffer.resize(*self.device, info.gpu_materials.size_bytes()).value(); @@ -423,21 +356,23 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in if (!info.gpu_mesh_instances.empty()) { self.mesh_instances_buffer = self.mesh_instances_buffer.resize(*self.device, info.gpu_mesh_instances.size_bytes()).value(); prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); - - self.mesh_instance_count = info.gpu_mesh_instances.size(); } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(*self.device, "mesh instances", vuk::eMemoryRead); } - if (!info.gpu_meshlet_instances.empty()) { - self.meshlet_instances_buffer = self.meshlet_instances_buffer.resize(*self.device, info.gpu_meshlet_instances.size_bytes()).value(); - prepared_frame.meshlet_instances_buffer = transfer_man.upload_staging(info.gpu_meshlet_instances, self.meshlet_instances_buffer); - - self.meshlet_instance_count = info.gpu_meshlet_instances.size(); - } else if (self.meshlet_instances_buffer) { - prepared_frame.meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "meshlet instances", vuk::eMemoryRead); + if (info.max_meshlet_instance_count > 0) { + prepared_frame.meshlet_instances_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(GPU::MeshletInstance)); + prepared_frame.visible_meshlet_instances_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(u32)); + prepared_frame.reordered_indices_buffer = transfer_man.alloc_transient_buffer( + vuk::MemoryUsage::eGPUonly, + info.max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) + ); } + prepared_frame.mesh_instance_count = info.mesh_instance_count; + return prepared_frame; } @@ -445,6 +380,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep ZoneScoped; auto &transfer_man = self.device->transfer_man(); + auto &bindless_descriptor_set = self.device->get_descriptor_set(); // ────────────────────────────────────────────────────────────────────── auto final_attachment = vuk::declare_ia( @@ -587,7 +523,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep camera_buffer = transfer_man.scratch_buffer(info.camera.value()); } - if (self.mesh_instance_count) { + if (frame.mesh_instance_count) { auto transforms_buffer = std::move(frame.transforms_buffer); auto meshes_buffer = std::move(frame.meshes_buffer); auto mesh_instances_buffer = std::move(frame.mesh_instances_buffer); @@ -595,61 +531,78 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep auto materials_buffer = std::move(frame.materials_buffer); // ── CULL MESHES ───────────────────────────────────────────────────── - auto vis_cull_meshes_pass = vuk::make_pass( - "vis cull meshes", - [mesh_instance_count = self.mesh_instance_count, cull_flags = info.cull_flags]( + auto vis_select_lods_pass = vuk::make_pass( + "vis select lods", + [mesh_instance_count = frame.mesh_instance_count, cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eComputeRead) camera, - VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, - VUK_BA(vuk::eComputeRW) cull_meshlets_cmd, - VUK_BA(vuk::eComputeWrite) visible_mesh_instances_indices, + VUK_BA(vuk::eComputeRW) mesh_instances, + VUK_BA(vuk::eComputeRW) meshlet_instances, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // - .bind_compute_pipeline("passes.cull_meshes") + .bind_compute_pipeline("passes.select_lods") .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, mesh_instances) - .bind_buffer(0, 2, meshes) - .bind_buffer(0, 3, transforms) - .bind_buffer(0, 4, cull_meshlets_cmd) - .bind_buffer(0, 5, visible_mesh_instances_indices) + .bind_buffer(0, 1, meshes) + .bind_buffer(0, 2, transforms) + .bind_buffer(0, 3, mesh_instances) + .bind_buffer(0, 4, meshlet_instances) + .bind_buffer(0, 5, visible_meshlet_instances_count) .bind_buffer(0, 6, debug_drawer) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) .dispatch_invocations(mesh_instance_count); - return std::make_tuple(camera, mesh_instances, meshes, transforms, cull_meshlets_cmd, visible_mesh_instances_indices, debug_drawer); + return std::make_tuple(camera, meshes, transforms, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); } ); - auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - auto visible_mesh_instances_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, self.mesh_instance_count * sizeof(u32)); + auto visible_meshlet_instances_count_buffer = transfer_man.scratch_buffer({ 0 }); std::tie( camera_buffer, - mesh_instances_buffer, meshes_buffer, transforms_buffer, - cull_meshlets_cmd_buffer, - visible_mesh_instances_indices_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, + visible_meshlet_instances_count_buffer, debug_drawer_buffer ) = - vis_cull_meshes_pass( + vis_select_lods_pass( std::move(camera_buffer), - std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), - std::move(cull_meshlets_cmd_buffer), - std::move(visible_mesh_instances_indices_buffer), + std::move(mesh_instances_buffer), + std::move(meshlet_instances_buffer), + std::move(visible_meshlet_instances_count_buffer), std::move(debug_drawer_buffer) ); + auto generate_cull_commands_pass = vuk::make_pass( + "generate cull commands", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { + cmd_list // + .bind_compute_pipeline("passes.generate_cull_commands") + .bind_buffer(0, 0, visible_meshlet_instances_count) + .bind_buffer(0, 1, cull_meshlets_cmd) + .dispatch(1); + + return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); + } + ); + + auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); + std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = + generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); + // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( "vis cull meshlets", - [meshlet_instance_count = self.meshlet_instance_count, cull_flags = info.cull_flags]( + [cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, @@ -658,6 +611,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeRead) hiz, + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, VUK_BA(vuk::eComputeRW) cull_triangles_cmd, VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRW) debug_drawer @@ -671,10 +625,11 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep .bind_buffer(0, 4, transforms) .bind_image(0, 5, hiz) .bind_sampler(0, 6, hiz_sampler_info) - .bind_buffer(0, 7, cull_triangles_cmd) - .bind_buffer(0, 8, visible_meshlet_instances_indices) - .bind_buffer(0, 9, debug_drawer) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(meshlet_instance_count, cull_flags)) + .bind_buffer(0, 7, visible_meshlet_instances_count) + .bind_buffer(0, 8, cull_triangles_cmd) + .bind_buffer(0, 9, visible_meshlet_instances_indices) + .bind_buffer(0, 10, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) .dispatch_indirect(dispatch_cmd); return std::make_tuple( @@ -692,8 +647,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep ); auto cull_triangles_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - auto visible_meshlet_instances_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, self.meshlet_instance_count * sizeof(u32)); + auto visible_meshlet_instances_indices_buffer = std::move(frame.visible_meshlet_instances_indices_buffer); std::tie( camera_buffer, @@ -714,6 +668,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep std::move(meshes_buffer), std::move(transforms_buffer), std::move(hiz_attachment), + std::move(visible_meshlet_instances_count_buffer), std::move(cull_triangles_cmd_buffer), std::move(visible_meshlet_instances_indices_buffer), std::move(debug_drawer_buffer) @@ -761,10 +716,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep ); auto draw_command_buffer = transfer_man.scratch_buffer({ .instanceCount = 1 }); - auto reordered_indices_buffer = transfer_man.alloc_transient_buffer( - vuk::MemoryUsage::eGPUonly, - self.meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) - ); + auto reordered_indices_buffer = std::move(frame.reordered_indices_buffer); std::tie( camera_buffer, @@ -832,7 +784,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep // ── VISBUFFER ENCODE ──────────────────────────────────────────────── auto vis_encode_pass = vuk::make_pass( "vis encode", - [descriptor_set = &self.materials_descriptor_set]( + [descriptor_set = &bindless_descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, VUK_BA(vuk::eIndexRead) index_buffer, @@ -972,7 +924,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, Prep // ── VISBUFFER DECODE ──────────────────────────────────────────────── auto vis_decode_pass = vuk::make_pass( "vis decode", - [descriptor_set = &self.materials_descriptor_set]( // + [descriptor_set = &bindless_descriptor_set]( // vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eFragmentRead) camera, VUK_BA(vuk::eFragmentRead) meshlet_instances, @@ -1524,18 +1476,11 @@ auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { self.device->wait(); - self.mesh_instance_count = 0; - if (self.transforms_buffer) { self.device->destroy(self.transforms_buffer.id()); self.transforms_buffer = {}; } - if (self.meshlet_instances_buffer) { - self.device->destroy(self.meshlet_instances_buffer.id()); - self.meshlet_instances_buffer = {}; - } - if (self.mesh_instances_buffer) { self.device->destroy(self.mesh_instances_buffer.id()); self.mesh_instances_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index fc271d51..229cf86d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -7,10 +7,7 @@ namespace lr { struct FramePrepareInfo { u32 mesh_instance_count = 0; - u32 meshlet_instance_count = 0; - - ls::span dirty_texture_indices = {}; - ls::span> dirty_textures = {}; + u32 max_meshlet_instance_count = 0; ls::span dirty_transform_ids = {}; ls::span gpu_transforms = {}; @@ -20,14 +17,16 @@ struct FramePrepareInfo { ls::span gpu_meshes = {}; ls::span gpu_mesh_instances = {}; - ls::span gpu_meshlet_instances = {}; }; struct PreparedFrame { + u32 mesh_instance_count = 0; vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; vuk::Value meshlet_instances_buffer = {}; + vuk::Value visible_meshlet_instances_indices_buffer = {}; + vuk::Value reordered_indices_buffer = {}; vuk::Value materials_buffer = {}; }; @@ -53,13 +52,9 @@ struct SceneRenderer { Buffer exposure_buffer = {}; Buffer transforms_buffer = {}; - u32 mesh_instance_count = 0; - u32 meshlet_instance_count = 0; Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; - Buffer meshlet_instances_buffer = {}; - vuk::PersistentDescriptorSet materials_descriptor_set = {}; Buffer materials_buffer = {}; // Then what are they? diff --git a/Lorr/ls/span.hh b/Lorr/ls/span.hh index 98a8adc7..1647633a 100755 --- a/Lorr/ls/span.hh +++ b/Lorr/ls/span.hh @@ -23,35 +23,35 @@ struct span : public std::span { constexpr span() = default; template - constexpr span(const span &other): std::span(other.data(), other.size()) {}; + constexpr span(const span &other) : std::span(other.data(), other.size()){}; - constexpr span(this_type::reference v): std::span({ &v, 1 }) {}; + constexpr span(this_type::reference v) : std::span({ &v, 1 }) {}; - constexpr explicit(EXTENT != std::dynamic_extent) span(T *v, this_type::size_type size): std::span(v, size) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(T *v, this_type::size_type size) : std::span(v, size){}; - constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator v, this_type::size_type size): std::span(v, size) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator v, this_type::size_type size) : std::span(v, size){}; - constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator begin_it, this_type::iterator end_it): - std::span(begin_it, end_it) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator begin_it, this_type::iterator end_it) : + std::span(begin_it, end_it){}; template - constexpr span(T (&arr)[N]): std::span(arr) {}; + constexpr span(T (&arr)[N]) : std::span(arr){}; template - constexpr span(std::array &arr): std::span(arr) {}; + constexpr span(std::array &arr) : std::span(arr){}; template - constexpr span(const std::array &arr): std::span(arr) {}; + constexpr span(const std::array &arr) : std::span(arr){}; - constexpr span(std::vector &v): std::span(v.begin(), v.end()) {}; + constexpr span(std::vector &v) : std::span(v.begin(), v.end()) {}; - constexpr span(const std::vector &v): std::span(v.begin(), v.end()) {}; + constexpr span(const std::vector &v) : std::span(v.begin(), v.end()) {}; template - constexpr span(static_vector &arr): std::span(arr.begin(), arr.end()) {}; + constexpr span(static_vector &arr) : std::span(arr.begin(), arr.end()){}; template - constexpr span(const static_vector &arr): std::span(arr.begin(), arr.end()) {}; + constexpr span(const static_vector &arr) : std::span(arr.begin(), arr.end()){}; }; template diff --git a/xmake/packages.lua b/xmake/packages.lua index 5eb6bee4..f08b456b 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -36,9 +36,9 @@ add_requires("simdutf v6.2.0") add_requires("simdjson v3.12.2") add_requires("unordered_dense v4.5.0") add_requires("tracy v0.11.1", { configs = { - tracy_enable = false, - on_demand = true, - callstack = true, + tracy_enable = has_config("profile"), + on_demand = has_config("profile"), + callstack = has_config("profile"), callstack_inlines = false, code_transfer = true, exit = true, @@ -59,7 +59,7 @@ add_requires("vuk 2025.07.09", { configs = { disable_exceptions = true, }, debug = is_mode("debug") }) -add_requires("meshoptimizer v0.24", { debug = true }) -add_requires("ktx v4.4.0", { debug = true }) +add_requires("meshoptimizer v0.24") +add_requires("ktx v4.4.0") add_requires("svector v1.0.3")