diff --git a/README_hfspace.md b/README_hfspace.md new file mode 100644 index 0000000..2b2ea44 --- /dev/null +++ b/README_hfspace.md @@ -0,0 +1,74 @@ +--- +title: GuideFlow3D +emoji: ๐Ÿค— +colorFrom: yellow +colorTo: blue +app_file: demos/run_gradio_demo.py +sdk: gradio +sdk_version: 6.0.1 +pinned: false +license: apache-2.0 +python_version: "3.12" +short_description: A HF Space that demonstrates all use-cases for GuideFlow3D +--- + +Note: If you're looking to get a working version of the Hugging Face Space, then please use [this link](https://huggingface.co/spaces/suvadityamuk/GuideFlow3D) to fetch files with all assets in place as well. + +

+

GuideFlow3D: Optimization-Guided Rectified Flow For Appearance Transfer

+

+ Sayan Deb Sarkar 1 + . + Sinisa Stekovic 2 + . + Vincent Lepetit 2 + . + Iro Armeni1 +

+

Neural Information Processing Systems (NeurIPS) 2025

+

+ 1 Stanford University ยท 2 ENPC, IP Paris +

+

+ + [![arXiv](https://img.shields.io/badge/arXiv-blue?logo=arxiv&color=%23B31B1B)](https://arxiv.org/abs/2510.16136) + [![ProjectPage](https://img.shields.io/badge/Project_Page-GuideFlow3D-blue)](https://sayands.github.io/guideflow3d) + [![License](https://img.shields.io/badge/License-Apache--2.0-929292)](https://www.apache.org/licenses/LICENSE-2.0) +
+

+ +

+ + + +

+ +

+TL;DR: 3D appearance transfer pipeline robust to strong geometric variations between objects. +
+ +## ๐Ÿ“ƒ Abstract + +Transferring appearance to 3D assets using different representations of the appearance object - such as images or text - has garnered interest due to its wide range of applications in industries like gaming, augmented reality, and digital content creation. However, state-of-the-art methods still fail when the geometry between the input and appearance objects is significantly different. A straightforward approach is to directly apply a 3D generative model, but we show that this ultimately fails to produce appealing results. Instead, we propose a principled approach inspired by universal guidance. Given a pretrained rectified flow model conditioned on image or text, our training-free method interacts with the sampling process by periodically adding guidance. This guidance can be modeled as a differentiable loss function, and we experiment with two different types of guidance including part-aware losses for appearance and self-similarity. Our experiments show that our approach successfully transfers texture and geometric details to the input 3D asset, outperforming baselines both qualitatively and quantitatively. We also show that traditional metrics are not suitable for evaluating the task due to their inability of focusing on local details and comparing dissimilar inputs, in absence of ground truth data. We thus evaluate appearance transfer quality with a GPT-based system objectively ranking outputs, ensuring robust and human-like assessment, as further confirmed by our user study. Beyond showcased scenarios, our method is general and could be extended to different types of diffusion models and guidance functions. + +# :newspaper: News + +- [2025-09] GuideFlow3D is accepted to **NeurIPS 2025** ๐Ÿ”ฅ See you in San Diego! + +## ๐Ÿšง Code Release + +โณ Code and data will be released by the end of November! Stay tuned for updates. + +## ๐Ÿ“ง Contact +If you have any questions regarding this project, please use the github issue tracker or contact Sayan Deb Sarkar (sdsarkar@stanford.edu). + +# :page_facing_up: Citation + +```bibtex +@inproceedings{sayandsarkar_2025_guideflow3d, + author = {Deb Sarkar, Sayan and Stekovic, Sinisa and Lepetit, Vincent and Armeni, Iro}, + title = {GuideFlow3D: Optimization-Guided Rectified Flow For 3D Appearance Transfer}, + booktitle = {Advances in Neural Information Processing Systems (NeurIPS)}, + year = {2025}, +} +``` \ No newline at end of file diff --git a/demos/__init__.py b/demos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/demos/build_wheels.sh b/demos/build_wheels.sh new file mode 100644 index 0000000..ea2777e --- /dev/null +++ b/demos/build_wheels.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# Create a directory to store wheels +mkdir -p ./wheels + +# Update system packages +apt-get update -y +apt-get install -y xvfb libx11-6 libgl1 libxrender1 + +# 1. Basic Dependencies +# We use 'pip wheel' to build/download wheels instead of installing +pip wheel --wheel-dir=./wheels \ + torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 \ + --index-url https://download.pytorch.org/whl/cu124 + +pip wheel --wheel-dir=./wheels \ + pyvirtualdisplay \ + pillow imageio imageio-ffmpeg tqdm easydict opencv-python-headless \ + scipy ninja rembg onnxruntime trimesh open3d xatlas pyvista \ + pymeshfix igraph transformers tensorview psutil \ + lightning==2.2 h5py yacs scikit-image loguru boto3 \ + mesh2sdf tetgen==0.6.4 pymeshlab plyfile einops libigl \ + polyscope potpourri3d simple_parsing arrgh vtk numpy==1.26.4 + +# 2. Git Repositories +# pip wheel handles git urls perfectly +pip wheel --wheel-dir=./wheels \ + git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8 + +# 3. Extensions with Custom Build Steps (nvdiffrast, diffoctreerast, mip-splatting) +# These often require cloning first if they have submodules or complex setups + +# nvdiffrast +mkdir -p /tmp/extensions +if [ ! -d "/tmp/extensions/nvdiffrast" ]; then + git clone https://github.com/NVlabs/nvdiffrast.git /tmp/extensions/nvdiffrast +fi +pip wheel --wheel-dir=./wheels /tmp/extensions/nvdiffrast + +# diffoctreerast +if [ ! -d "/tmp/extensions/diffoctreerast" ]; then + git clone --recurse-submodules https://github.com/JeffreyXiang/diffoctreerast.git /tmp/extensions/diffoctreerast +fi +pip wheel --wheel-dir=./wheels /tmp/extensions/diffoctreerast + +# mip-splatting (diff-gaussian-rasterization) +if [ ! -d "/tmp/extensions/mip-splatting" ]; then + git clone https://github.com/autonomousvision/mip-splatting.git /tmp/extensions/mip-splatting +fi +pip wheel --wheel-dir=./wheels /tmp/extensions/mip-splatting/submodules/diff-gaussian-rasterization/ + +# 4. Pre-built Wheels (Kaolin, torch-scatter, spconv) +# These are already wheels, so we just download them to the folder +pip download --dest ./wheels \ + kaolin==0.16.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.5.0_cu124.html + +pip download --dest ./wheels \ + spconv-cu124 + +pip download --dest ./wheels \ + torch-scatter -f https://data.pyg.org/whl/torch-2.5.0+cu124.html + +# 5. Python-PyCG +pip wheel --wheel-dir=./wheels 'python-pycg[all]' + +echo "All wheels built in ./wheels" diff --git a/demos/custom_utils.py b/demos/custom_utils.py new file mode 100644 index 0000000..da0561b --- /dev/null +++ b/demos/custom_utils.py @@ -0,0 +1,298 @@ +import os +import json +from subprocess import call, DEVNULL +import numpy as np +import shutil +import multiprocessing as mp +from lib.util.render import _install_blender, sphere_hammersley_sequence, BLENDER_PATH + +try: + mp.set_start_method("spawn", force=False) +except RuntimeError: + pass + +def _get_optimal_threads(num_workers): + """Calculate optimal CPU threads per Blender instance.""" + total_cores = os.cpu_count() or 4 + # Reserve 1 core for system/orchestration if possible + available_cores = max(1, total_cores - 1) + # Distribute remaining cores among workers + threads = max(1, available_cores // num_workers) + # Cap at 4 threads per instance since we are GPU bound anyway + # and too many threads just adds contention + return min(threads, 4) + +def _render_views_chunk(file_path, chunk_output_folder, views_chunk, blender_render_engine, cuda_device_id=None, threads=None): + """Render a subset of views into a chunk-specific folder.""" + os.makedirs(chunk_output_folder, exist_ok=True) + + # Prepare environment with GPU selection if provided + env = os.environ.copy() + if cuda_device_id is not None: + env["CUDA_VISIBLE_DEVICES"] = str(cuda_device_id) + + blender_exec = env.get('BLENDER_HOME', BLENDER_PATH) + if not os.path.exists(blender_exec) and blender_exec == BLENDER_PATH: + blender_exec = 'blender' # Fallback if specific path missing + + output_root = os.path.dirname(os.path.dirname(chunk_output_folder)) + blender_cache_dir = os.path.join(output_root, "blender_cache") + os.makedirs(blender_cache_dir, exist_ok=True) + env["XDG_CACHE_HOME"] = blender_cache_dir + + args = [ + blender_exec, '-b', + '-P', os.path.join(os.getcwd(), 'third_party/TRELLIS/dataset_toolkits', 'blender_script', 'render.py'), + '--', + '--views', json.dumps(views_chunk), + '--object', os.path.expanduser(file_path), + '--resolution', '512', + '--output_folder', chunk_output_folder, + '--engine', blender_render_engine, + '--save_mesh', + ] + + if threads: + args.extend(['--threads', str(threads)]) + + if file_path.endswith('.blend'): + args.insert(1, file_path) + + call(args, stdout=DEVNULL, stderr=DEVNULL, env=env) + +def _merge_blender_chunks(output_folder, chunk_infos, file_path, blender_render_engine): + """Merge chunk_* folders into the main output_folder and write transforms.json.""" + frames = [] + mesh_copied = False + + # Track global index for sequential renaming + global_idx = 0 + + for i, (chunk_path, chunk_views) in enumerate(chunk_infos): + if not os.path.isdir(chunk_path): + # Even if directory is missing (shouldn't happen due to retry), we advance index to keep alignment if possible + # But if directory missing, we likely failed. + # Let's assume retry logic works or we fail hard. + global_idx += len(chunk_views) + continue + + # Copy mesh.ply once (from first chunk that has it) + mesh_src = os.path.join(chunk_path, "mesh.ply") + mesh_dst = os.path.join(output_folder, "mesh.ply") + if not mesh_copied and os.path.exists(mesh_src): + shutil.copy2(mesh_src, mesh_dst) + mesh_copied = True + + chunk_transforms_path = os.path.join(chunk_path, "transforms.json") + + # Simple retry logic if chunk failed + if not os.path.exists(chunk_transforms_path): + print(f"[merge_chunks] Warning: missing transforms.json in {chunk_path}, re-rendering chunk.") + shutil.rmtree(chunk_path, ignore_errors=True) + # Use default 1 thread for retry to be safe + _render_views_chunk(file_path, chunk_path, chunk_views, blender_render_engine, threads=2) + + if not os.path.exists(chunk_transforms_path): + # If still missing, raise error + raise RuntimeError(f"Unable to generate transforms.json for {chunk_path}") + + with open(chunk_transforms_path, "r") as f: + chunk_data = json.load(f) + chunk_frames = chunk_data.get("frames", []) + + if not chunk_frames: + # Empty frames could mean render failure + raise RuntimeError(f"No frames found in {chunk_transforms_path}") + + frame_lookup = { + os.path.basename(frame.get("file_path", "")): frame for frame in chunk_frames + } + + # Sort files to ensure we map them to indices consistently if render.py uses ordered names (e.g. 000.png) + chunk_files = sorted([ + f for f in os.listdir(chunk_path) + if f.lower().endswith((".png", ".jpg", ".jpeg")) + ]) + + # We assume the sorted files correspond to the chunk_views in order + # If render.py produced '000.png', '001.png', ... they correspond to chunk_views[0], chunk_views[1]... + + for idx, img_name in enumerate(chunk_files): + src = os.path.join(chunk_path, img_name) + if img_name not in frame_lookup: + print(f"[merge_chunks] Warning: no metadata for {img_name} in {chunk_transforms_path}, skipping image.") + # os.remove(src) # Don't remove, just skip + continue + + # Rename to sequential number based on global index + # Format: 000.png, 001.png, etc. + # Or image_000.png if preferred, but adhering to existing project style (struct_renders uses 000.png) + # User request: "something like image_{num}.png" + # Interpreting as keeping the number sequential and using a clean format. + # Since structure renders used 000.png, I'll assume {num:03d}.png is the safe "image number" format. + # However, if I must follow "image_{num}.png" strictly, I would add the prefix. + # I will use just the number to maintain compatibility with any dataset loaders expecting standard indices. + + # Actually, render.py usually outputs 000.png. + # The logic: global_idx tracks the start of this chunk. + # The current image is the idx-th image in this chunk. + + current_global_num = global_idx + idx + dst_name = f"{current_global_num:03d}.png" + dst = os.path.join(output_folder, dst_name) + + shutil.move(src, dst) + + frame = frame_lookup[img_name].copy() + frame["file_path"] = dst_name + frames.append(frame) + + # Advance global index by number of views in this chunk (or number of files processed?) + # Better to advance by chunk_views length to keep alignment with original views list + global_idx += len(chunk_views) + + shutil.rmtree(chunk_path) + + if not frames: + raise RuntimeError("No frames were merged when building transforms.json") + + transforms_path = os.path.join(output_folder, "transforms.json") + with open(transforms_path, "w") as f: + json.dump({"frames": frames}, f, indent=4) + +def _run_single_render(file_path, output_folder, views, blender_render_engine): + # For single render, we can use more CPU threads since we are the only process + threads = min(os.cpu_count() or 4, 8) + + output_root = os.path.dirname(output_folder) + blender_cache_dir = os.path.join(output_root, "blender_cache") + os.makedirs(blender_cache_dir, exist_ok=True) + env = os.environ.copy() + env["XDG_CACHE_HOME"] = blender_cache_dir + + blender_exec = os.environ.get('BLENDER_HOME', BLENDER_PATH) + if not os.path.exists(blender_exec) and blender_exec == BLENDER_PATH: + blender_exec = 'blender' # Fallback + + args = [ + # 'xvfb-run', + # "-s", "-screen 0 1920x1080x24", + blender_exec, '-b', + '-P', os.path.join(os.getcwd(), 'third_party/TRELLIS/dataset_toolkits', 'blender_script', 'render.py'), + '--', + '--views', json.dumps(views), + '--object', os.path.expanduser(file_path), + '--resolution', '512', + '--output_folder', output_folder, + '--engine', blender_render_engine, + '--save_mesh', + '--threads', str(threads) + ] + if file_path.endswith('.blend'): + args.insert(1, file_path) + + # call(args, stdout=DEVNULL, stderr=DEVNULL) + call(args, env=env) + + +def render_all_views(file_path, output_folder, num_views=150, blender_render_engine="CYCLES", num_workers=None): + _install_blender() + # Build camera {yaw, pitch, radius, fov} + yaws = [] + pitchs = [] + offset = (np.random.rand(), np.random.rand()) + for i in range(num_views): + y, p = sphere_hammersley_sequence(i, num_views, offset) + yaws.append(y) + pitchs.append(p) + radius = [2] * num_views + fov = [40 / 180 * np.pi] * num_views + views = [{'yaw': y, 'pitch': p, 'radius': r, 'fov': f} for y, p, r, f in zip(yaws, pitchs, radius, fov)] + + # Determine GPU availability using torch if available (safe check) + num_gpus = 0 + try: + import torch + if torch.cuda.is_available(): + num_gpus = torch.cuda.device_count() + except ImportError: + pass + + # Smart worker count logic + if num_workers is None: + if blender_render_engine == 'CYCLES': + if num_gpus > 0: + # To maximize VRAM usage and overlap CPU preparation with GPU rendering, + # we can run multiple concurrent Blender instances per GPU. + # For object-level scenes, 2-3 workers per GPU is usually the sweet spot. + # Too many will cause context thrashing; too few leaves VRAM idle. + WORKERS_PER_GPU = 3 + num_workers = num_gpus * WORKERS_PER_GPU + else: + # No GPU found: fallback to CPU. Parallelizing CPU might help if RAM permits. + # Cap at 4 to be safe. + num_workers = min(os.cpu_count() or 4, 4) + else: + # For non-cycles (e.g. Eevee), we can be slightly more aggressive but still bound by GPU + if num_gpus > 0: + num_workers = num_gpus + else: + num_workers = min(os.cpu_count() or 4, 8) + + # Override: Force serial for small batches to avoid startup overhead + # 15 views is small enough that overhead of 2+ processes > gain + if len(views) < 30: + num_workers = 1 + + if num_workers > 1: + print(f"[render_all_views] Running with {num_workers} workers (GPUs detected: {num_gpus}).") + else: + print(f"[render_all_views] Running serially (GPUs detected: {num_gpus}).") + + if num_workers <= 1: + _run_single_render(file_path, output_folder, views, blender_render_engine) + else: + # Multi-process: split views into chunks and render in parallel + num_workers = min(num_workers, num_views) + view_chunks = np.array_split(views, num_workers) + + # Convert numpy arrays back to plain lists (json-serializable) + view_chunks = [list(chunk) for chunk in view_chunks] + chunk_infos = [] + + # Calculate optimal threads per worker + threads_per_worker = _get_optimal_threads(num_workers) + + with mp.Pool(processes=num_workers) as pool: + jobs = [] + for idx, chunk in enumerate(view_chunks): + chunk_output_folder = os.path.join(output_folder, f"chunk_{idx}") + chunk_infos.append((chunk_output_folder, chunk)) + + # Assign GPU ID round-robin if GPUs are available + assigned_gpu = None + if num_gpus > 0: + assigned_gpu = idx % num_gpus + + jobs.append( + pool.apply_async( + _render_views_chunk, + (file_path, chunk_output_folder, chunk, blender_render_engine, assigned_gpu, threads_per_worker), + ) + ) + for j in jobs: + j.get() + + _merge_blender_chunks(output_folder, chunk_infos, file_path, blender_render_engine) + + if os.path.exists(os.path.join(output_folder, 'transforms.json')): + # Return list of rendered image paths + out_renderviews = sorted( + [ + os.path.join(output_folder, f) + for f in os.listdir(output_folder) + if f.lower().endswith((".png", ".jpg", ".jpeg")) + ] + ) + return out_renderviews if out_renderviews else None + return None diff --git a/demos/demo_setup_colab.sh b/demos/demo_setup_colab.sh new file mode 100644 index 0000000..d5bcfbb --- /dev/null +++ b/demos/demo_setup_colab.sh @@ -0,0 +1,59 @@ +apt-get update -y +apt-get install -y xvfb +pip install pyvirtualdisplay + +pip install torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 --index-url https://download.pytorch.org/whl/cu124 +pip install pillow imageio imageio-ffmpeg tqdm easydict opencv-python-headless scipy ninja rembg onnxruntime trimesh open3d xatlas pyvista pymeshfix igraph transformers tensorview -qq +pip install git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8 -qq +pip install flash-attn + +mkdir -p /tmp/extensions +git clone https://github.com/NVlabs/nvdiffrast.git /tmp/extensions/nvdiffrast +pip install /tmp/extensions/nvdiffrast + +mkdir -p /tmp/extensions +git clone --recurse-submodules https://github.com/JeffreyXiang/diffoctreerast.git /tmp/extensions/diffoctreerast +pip install /tmp/extensions/diffoctreerast + +# pip install kaolin==0.18.0 -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.8.0_cu128.html # CHECK CUDA VERSION BEFORE INSTALLING +pip install kaolin -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.5.0_cu124.html + +mkdir -p /tmp/extensions +git clone https://github.com/autonomousvision/mip-splatting.git /tmp/extensions/mip-splatting +pip install /tmp/extensions/mip-splatting/submodules/diff-gaussian-rasterization/ + +pip install spconv-cu124 + +pip install -U 'python-pycg[all]' +pip install psutil +pip install lightning==2.2 h5py yacs trimesh scikit-image loguru boto3 +pip install mesh2sdf tetgen pymeshlab plyfile einops libigl polyscope potpourri3d simple_parsing arrgh open3d +# pip install torch-scatter -f https://data.pyg.org/whl/torch-2.8.0+cu128.html +pip install torch-scatter -f https://data.pyg.org/whl/torch-2.5.0+cu124.html +sudo apt install libx11-6 libgl1 libxrender1 +pip install vtk + +pip install tetgen==0.6.4 +pip install numpy==1.26.4 + +mkdir -p ./models +wget https://huggingface.co/mikaelaangel/partfield-ckpt/resolve/main/model_objaverse.ckpt -O ../models/model_objaverse.ckpt + +export BLENDER_LINK='https://download.blender.org/release/Blender3.0/blender-3.0.1-linux-x64.tar.xz' +export BLENDER_INSTALLATION_PATH='/tmp' +export BLENDER_HOME="${BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64/blender" + +install_blender() { + if [ ! -f "$BLENDER_HOME" ]; then + echo "Installing Blender..." + sudo apt-get update + sudo apt-get install -y libxrender1 libxi6 libxkbcommon-x11-0 libsm6 + wget "$BLENDER_LINK" -P "$BLENDER_INSTALLATION_PATH" + tar -xvf "${BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz" -C "$BLENDER_INSTALLATION_PATH" + echo "Blender installed at $BLENDER_HOME" + else + echo "Blender already installed at $BLENDER_HOME" + fi +} + +install_blender \ No newline at end of file diff --git a/demos/packages.txt b/demos/packages.txt new file mode 100644 index 0000000..f4462fc --- /dev/null +++ b/demos/packages.txt @@ -0,0 +1,7 @@ +xvfb +libx11-6 +libgl1 +libxrender1 +libxi6 +libxkbcommon-x11-0 +libsm6 \ No newline at end of file diff --git a/demos/run_gradio_demo.py b/demos/run_gradio_demo.py new file mode 100644 index 0000000..60f370f --- /dev/null +++ b/demos/run_gradio_demo.py @@ -0,0 +1,633 @@ +import os +import sys +import spaces +import base64 +import tempfile +import hashlib +import re +import uuid +import shutil +from omegaconf import OmegaConf +from typing import Optional, Union, Tuple + +import gradio as gr + +GUIDEFLOW_YELLOW = "#ccad57" +GUIDEFLOW_BLUE = "#2459c2" +GUIDEFLOW_GREEN = "#8edf9f" + +os.environ["CUMM_DISABLE_JIT"] = "1" +os.environ["SPCONV_DISABLE_JIT"] = "1" +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +# Add project root to Python path +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if project_root not in sys.path: + sys.path.insert(0, project_root) + +# --- START XVFB GLOBALLY --- +# Check if we are in a headless environment and DISPLAY is not set +if os.environ.get("DISPLAY") is None: + print("[INFO] Starting Xvfb for headless rendering...") + from pyvirtualdisplay import Display + + # Start Xvfb. visible=0 means headless. + # size=(1920, 1080) matches your previous xvfb-run settings. + display = Display(visible=0, size=(1920, 1080)) + display.start() + + # Ensure DISPLAY env var is set for subprocesses + if os.environ.get("DISPLAY") is None: + # PyVirtualDisplay usually sets this, but fallback if needed + os.environ["DISPLAY"] = f":{display.display}" + + print(f"[INFO] Xvfb started on {os.environ['DISPLAY']}") + +# --- LOGO SETUP (BASE64) --- +def image_to_base64(image_path): + """Encodes an image to a base64 string for direct HTML embedding.""" + if not os.path.exists(image_path): + return "" + with open(image_path, "rb") as img_file: + encoded_string = base64.b64encode(img_file.read()).decode('utf-8') + return f"data:image/png;base64,{encoded_string}" + +logo_rel_path = os.path.join("demos", "assets", "logo.png") +logo_abs_path = os.path.join(project_root, logo_rel_path) +logo_src = image_to_base64(logo_abs_path) + +BLENDER_LINK = 'https://download.blender.org/release/Blender3.0/blender-3.0.1-linux-x64.tar.xz' +BLENDER_INSTALLATION_PATH = '/tmp' +BLENDER_PATH = f'{BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64/blender' + +def _install_blender(): + if not os.path.exists(BLENDER_PATH): + os.system('sudo apt-get update') + os.system('sudo apt-get install -y libxrender1 libxi6 libxkbcommon-x11-0 libsm6') + os.system(f'wget {BLENDER_LINK} -P {BLENDER_INSTALLATION_PATH}') + os.system(f'tar -xvf {BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz -C {BLENDER_INSTALLATION_PATH}') + +def _download_objaverse_ckpt(): + if not os.path.exists(os.path.join(project_root, 'models', 'model_objaverse.ckpt')): + os.makedirs(os.path.join(project_root, 'models'), exist_ok=True) + os.system(f'wget https://huggingface.co/mikaelaangel/partfield-ckpt/resolve/main/model_objaverse.ckpt -O {os.path.join(project_root, 'models', 'model_objaverse.ckpt')}') + +_install_blender() +_download_objaverse_ckpt() + +# Attempt import, handle failure gracefully for the demo shell +try: + from demos.pipeline_fn import GuideFlow3dPipeline +except ImportError: + GuideFlow3dPipeline = None + +pipe = None +cfg = None + +# Initialize Pipeline +try: + cfg_path = os.path.join(project_root, 'config', 'default.yaml') + if os.path.exists(cfg_path): + cfg = OmegaConf.load(cfg_path) + if GuideFlow3dPipeline: + pipe = GuideFlow3dPipeline().from_pretrained(cfg) +except Exception as e: + print(f"Error initializing pipeline: {e}") + pass + +output_dir = os.path.join(project_root, "all_outputs") +os.makedirs(output_dir, exist_ok=True) + +# --- MAPPING HELPERS --- + +# Dictionary mapping static thumbnail images to actual GLB files +THUMB_TO_GLB = { + # Structure Mesh Examples + "example_data/thumbs/structure/bench_chair.png": "example_data/structure_mesh/bench_chair.glb", + "example_data/thumbs/structure/cabinet.png": "example_data/structure_mesh/cabinet.glb", + "example_data/thumbs/structure/chair.png": "example_data/structure_mesh/chair.glb", + "example_data/thumbs/structure/giraffe.png": "example_data/structure_mesh/giraffe.glb", + "example_data/thumbs/structure/motorcycle.png": "example_data/structure_mesh/motorcycle.glb", + "example_data/thumbs/structure/plane.png": "example_data/structure_mesh/plane.glb", + + # Reference Appearance Mesh Examples + "example_data/thumbs/appearance/B01DA8LC0A.jpg": "example_data/appearance_mesh/B01DA8LC0A.glb", + "example_data/thumbs/appearance/B01DJH73Y6.png": "example_data/appearance_mesh/B01DJH73Y6.glb", + "example_data/thumbs/appearance/B0728KSP33.jpg": "example_data/appearance_mesh/B0728KSP33.glb", + "example_data/thumbs/appearance/B07B4YXNR8.jpg": "example_data/appearance_mesh/B07B4YXNR8.glb", + "example_data/thumbs/appearance/B07QC84LP1.png": "example_data/appearance_mesh/B07QC84LP1.glb", + "example_data/thumbs/appearance/B07QFRSC8M.png": "example_data/appearance_mesh/B07QFRSC8M_zup.glb", + "example_data/thumbs/appearance/B082QC7YKR.png": "example_data/appearance_mesh/B082QC7YKR_zup.glb" +} + +# Create a lookup based on basename to be robust against Gradio temp paths +THUMB_BASENAME_TO_GLB = {os.path.basename(k): v for k, v in THUMB_TO_GLB.items()} + +# Create reverse lookup for strict example detection +GLB_ABS_PATH_TO_NAME = {} +for k, v in THUMB_TO_GLB.items(): + abs_p = os.path.abspath(os.path.join(project_root, v)) + name_no_ext = os.path.splitext(os.path.basename(v))[0] + GLB_ABS_PATH_TO_NAME[abs_p] = name_no_ext + +def load_mesh_from_thumb(thumb_path: str) -> Optional[str]: + """Callback to return the GLB path associated with a thumbnail.""" + if not thumb_path: + return None + basename = os.path.basename(thumb_path) + return THUMB_BASENAME_TO_GLB.get(basename, None) + +def _ensure_glb_path(result: Union[str, bytes, os.PathLike]) -> str: + """Normalize various return types from fn() to a .glb file path.""" + if isinstance(result, (str, os.PathLike)): + path = os.fspath(result) + if not os.path.exists(path): + raise gr.Error("Returned mesh path does not exist.") + return path + if isinstance(result, (bytes, bytearray)): + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".glb") + tmp.write(result) + tmp.flush() + tmp.close() + return tmp.name + +def file_sha256(path: str, chunk_size: int = 1 << 20) -> str: + h = hashlib.sha256() + if not path or not os.path.exists(path): + return "nocontent" + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): + h.update(chunk) + return h.hexdigest() + +def get_cache_folder(struct_mesh_path: str) -> str: + """Determines the output folder name based on the structure mesh.""" + struct_abs = os.path.abspath(struct_mesh_path) + + # 1. Check if it is a known example + # We check both absolute path and if the file hash matches a known example (to be safe) + # But simplest is just path check for now as examples are static files. + if struct_abs in GLB_ABS_PATH_TO_NAME: + return GLB_ABS_PATH_TO_NAME[struct_abs] + + # Also check if basename matches an example (for when gradio moves files around but keeps names?) + # Actually, safely relying on content hash is better for "New" files. + + current_hash = file_sha256(struct_mesh_path) + + # 2. Scan existing temp_* folders for matching hash + # We look for a struct_mesh.hash file inside. + if os.path.exists(output_dir): + for item in os.listdir(output_dir): + if item.startswith("temp_"): + folder_path = os.path.join(output_dir, item) + if os.path.isdir(folder_path): + hash_file = os.path.join(folder_path, "struct_mesh.hash") + if os.path.exists(hash_file): + try: + with open(hash_file, "r") as f: + stored_hash = f.read().strip() + if stored_hash == current_hash: + return item + except: + continue + + # 3. If not found, create new temp_{id} + new_id = uuid.uuid4().hex[:8] + return f"temp_{new_id}" + +# @spaces.GPU(duration=360) +def on_run( + guidance_mode_state: str, + app_struct_mesh: Optional[str], + app_ref_mesh: Optional[str], + app_ref_image: Optional[str], + sim_struct_mesh: Optional[str], + sim_ref_text: Optional[str], + sim_ref_image: Optional[str], + target_up_label: str, + reference_up_label: str, + cfg_strength: float, + num_steps: int, + learning_rate: float, +) -> Tuple[str, Optional[str]]: + + current_mode = guidance_mode_state.lower() + + if current_mode == "appearance": + target_mesh_path = app_struct_mesh + reference_mesh_path = app_ref_mesh + reference_image_path = app_ref_image + reference_text = None + else: + target_mesh_path = sim_struct_mesh + reference_text = sim_ref_text + reference_image_path = sim_ref_image + reference_mesh_path = None + + if not target_mesh_path: + raise gr.Error(f"Target Structure mesh is required for {current_mode} mode.") + + if pipe is None: + raise gr.Error("Pipeline not initialized. Check logs.") + + # --- Determine Output Directory --- + folder_name = get_cache_folder(target_mesh_path) + run_output_dir = os.path.join(output_dir, folder_name) + os.makedirs(run_output_dir, exist_ok=True) + + print(f"[INFO] Using output directory: {run_output_dir}") + + args = { + "structure_mesh": target_mesh_path, + "output_dir": run_output_dir, + "convert_target_yup_to_zup": target_up_label == "Z-up", + "convert_appearance_yup_to_zup": reference_up_label == "Z-up", + "appearance_mesh": reference_mesh_path, + "appearance_image": reference_image_path, + "appearance_text": (reference_text or "").strip(), + } + + fn = None + if current_mode == "appearance": + if not reference_mesh_path: + raise gr.Error("Appearance mode requires a reference mesh.") + fn = pipe.run_appearance + args.pop("appearance_text", None) + else: # similarity + if not reference_text and reference_image_path: + args["appearance_image"] = reference_image_path + args.pop("appearance_text", None) + args["app_type"] = "image" + elif reference_text and not reference_image_path: + args["appearance_text"] = reference_text + args.pop("appearance_image", None) + args["app_type"] = "text" + elif reference_text and reference_image_path: + raise gr.Error("Similarity mode requires a text prompt or reference image, but not both.") + else: + raise gr.Error("Similarity mode requires a text prompt or reference image.") + fn = pipe.run_self_similarity + args.pop("appearance_mesh", None) + args.pop("convert_appearance_yup_to_zup", None) + + if cfg: + updated_cfg = cfg # OmegaConf.load(cfg) + updated_cfg.cfg_strength = cfg_strength + updated_cfg.steps = num_steps + updated_cfg.learning_rate = learning_rate + pipe.cfg = updated_cfg + + try: + result_mesh, result_video = fn(**args) + mesh_path = _ensure_glb_path(result_mesh) + video_path = _ensure_glb_path(result_video) + return mesh_path, video_path + except Exception as e: + raise gr.Error(f"Generation failed: {str(e)}") + +# --- UI Styling & Header --- + +font_reg = os.path.join(project_root, "demos", "assets", "fonts", "avenir-next", "AvenirNextCyr-Regular.ttf") +font_bold = os.path.join(project_root, "demos", "assets", "fonts", "avenir-next", "AvenirNextCyr-Bold.ttf") +font_heavy = os.path.join(project_root, "demos", "assets", "fonts", "avenir-next", "AvenirNextCyr-Heavy.ttf") + +css = f""" +@font-face {{ + font-family: 'Avenir Next Regular'; + src: url('/file={font_reg}') format('truetype'); + font-weight: normal; + font-style: normal; +}} + @font-face {{ + font-family: 'Avenir Next Bold'; + src: url('/file={font_bold}') format('truetype'); + font-weight: bold; + font-style: normal; + }} + +@font-face {{ + font-family: 'Avenir Next Heavy'; + src: url('/file={font_heavy}') format('truetype'); + font-weight: normal; + font-style: normal; +}} + +body, .gradio-container {{ + background-color: #ffffff !important; + color: #1f2937 !important; + font-family: 'Avenir Next Regular', sans-serif !important; +}} +.dark body, .dark .gradio-container {{ + background-color: #ffffff !important; + color: #1f2937 !important; + font-family: 'Avenir Next Regular', 'Inter', 'Roboto', sans-serif !important; +}} +/* Add specific components */ +.gradio-container button, +.gradio-container input, +.gradio-container textarea, +.gradio-container label, +.gradio-container span, +.gradio-container p, +.gradio-container h1, +.gradio-container h2, +.gradio-container h3, +.gradio-container h4, +.gradio-container h5, +.gradio-container h6 +{{ + font-family: 'Avenir Next Regular', sans-serif !important; +}} +.guideflow-header {{ + display: flex; + flex-direction: column; + align-items: center; + margin-bottom: 1rem; + transform: translateY(0.5rem); +}} +.logo-row {{ + display: flex; + align-items: baseline; + gap: 0.2rem; +}} +.logo-img {{ + height: 4rem; + width: auto; + transform: translateY(0.5rem); +}} +.title-uide, .title-flow, .title-3d {{ + font-family: 'Avenir Next Regular', sans-serif !important; + font-size: 3.5rem; + font-weight: normal; + line-height: 1.2; +}} +.title-uide {{ + background: linear-gradient(90deg, {GUIDEFLOW_GREEN}, {GUIDEFLOW_BLUE}); + -webkit-background-clip: text; + background-clip: text; + color: transparent; +}} +.title-flow {{ + color: {GUIDEFLOW_BLUE}; +}} +.title-3d {{ + color: {GUIDEFLOW_YELLOW}; +}} +.subtitle {{ + font-size: 1.5rem; + font-family: 'Avenir Next Regular', sans-serif; + color: {GUIDEFLOW_YELLOW}; + margin-top: 0.5rem; + text-align: center; +}} +.authors {{ + font-size: 1rem; + color: #334155; + margin-top: 0.5rem; +}} +.affiliations {{ + font-size: 0.9rem; + color: #6b7280; + margin-top: 0.2rem; +}} +.venue {{ + font-size: 1.1rem; + font-weight: 700; + color: #111827; + margin-top: 0.5rem; +}} +.links a {{ + color: {GUIDEFLOW_BLUE}; + text-decoration: none; + margin: 0 0.5rem; + font-weight: 500; +}} +.links a:hover {{ + text-decoration: underline; +}} +.demo-credit {{ + font-size: 0.9rem; + color: #64748b; + margin-top: 0.5rem; +}} +.instructions-container {{ + max-width: 800px; + margin: 0 auto 2rem auto; + text-align: left; + padding: 0 1rem; +}} +.input-row {{ align-items: flex-start; margin-bottom: 1rem; }} +""" + +HEADER_HTML = f""" +
+
+ GuideFlow3D Logo + uideFlow3D +
+
Optimization-Guided Rectified Flow For Appearance Transfer
+
+ Sayan Deb Sarkar1    + Sinisa Stekovic2    + Vincent Lepetit2    + Iro Armeni1 +
+
+ 1Stanford University    2ENPC, IP Paris +
+
NeurIPS 2025
+ +
+ Demo made by Suvaditya Mukherjee +
+
+""" + +INSTRUCTIONS_MD = """ +
+

Instructions

+
    +
  1. Upload a Structure Mesh (.glb): This defines the shape of your 3D object. We expect a y-up mesh, but feel free to convert it using the "Advanced Settings" below.
  2. +
  3. Choose Guidance Mode: Select "Self-Similarity" (Text) or "Appearance" (Mesh/Image) using the tabs.
  4. +
  5. Provide Reference: Enter a text prompt or upload a reference image/mesh.
  6. +
  7. Run: Click "Generate 3D Asset" to create the result.
  8. +
  9. Result: The result will be displayed in the viewer on the left, and a video will be generated on the right.
  10. +
+
+""" + +# Example Data +EX_STRUCT_THUMBS = [ + ["example_data/thumbs/structure/bench_chair.png"], + ["example_data/thumbs/structure/cabinet.png"], + ["example_data/thumbs/structure/chair.png"], + ["example_data/thumbs/structure/giraffe.png"], + ["example_data/thumbs/structure/motorcycle.png"], + ["example_data/thumbs/structure/plane.png"] +] + +EX_MESH_THUMBS = [ + ["example_data/thumbs/appearance/B01DA8LC0A.jpg"], + ["example_data/thumbs/appearance/B01DJH73Y6.png"], + ["example_data/thumbs/appearance/B0728KSP33.jpg"], + ["example_data/thumbs/appearance/B07B4YXNR8.jpg"], + ["example_data/thumbs/appearance/B07QC84LP1.png"], + ["example_data/thumbs/appearance/B07QFRSC8M.png"], + ["example_data/thumbs/appearance/B082QC7YKR.png"] +] + +EX_IMG = [ + "example_data/appearance_image/B01DA8LC0A.jpg", + "example_data/appearance_image/B01DJH73Y6.png", + "example_data/appearance_image/B0728KSP33.jpg", + "example_data/appearance_image/B07B4YXNR8.jpg", + "example_data/appearance_image/B07QC84LP1.png", + "example_data/appearance_image/B07QFRSC8M.jpg", + "example_data/appearance_image/B082QC7YKR.png" +] +EX_TEXT = ["a wooden chair", "a marble statue", "A black metal-framed bed with a curved headboard, white rectangular mattress, and two white rectangular pillows.", "Rectangular wooden cabinet with reddish-brown finish, standing on four short legs. Features two drawers (upper larger, lower smaller) and an open shelf below. Back has a power socket extension and cable, ideal for electronics."] + +with gr.Blocks( + title="GuideFlow3D", +) as demo: + + gr.HTML(HEADER_HTML) + gr.HTML(INSTRUCTIONS_MD) + + guidance_mode_state = gr.State(value="Similarity") + + with gr.Tabs() as guidance_tabs: + + # --- TAB 1: SELF-SIMILARITY (LEFT) --- + with gr.TabItem("Self-Similarity", id="tab_similarity") as tab_sim: + gr.Markdown("### Similarity Editing Inputs") + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + sim_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300) + with gr.Column(scale=2): + sim_struct_hidden = gr.Image(type="filepath", visible=False) + # sim_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=sim_struct_hidden, label="Structure Examples") + sim_struct_mesh_examples = gr.Examples( + examples=EX_STRUCT_THUMBS, + inputs=sim_struct_hidden, + outputs=sim_struct_mesh, # Target the 3D viewer directly + fn=load_mesh_from_thumb, # Run the conversion function + run_on_click=True, # Force execution on click + label="Structure Examples" + ) + + gr.Markdown("> **_NOTE:_** Please use either a reference image or a reference text prompt, but not both.") + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + sim_ref_image = gr.Image(label="Reference Appearance Image", type="filepath", height=250) + with gr.Column(scale=2): + gr.Examples(examples=EX_IMG, inputs=sim_ref_image, label="Image Examples") + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + sim_ref_text = gr.Textbox(label="Reference Text Prompt", placeholder="Describe the appearance...", lines=2) + with gr.Column(scale=2): + gr.Examples(examples=EX_TEXT, inputs=sim_ref_text, label="Prompt Examples") + + # --- TAB 2: APPEARANCE (RIGHT) --- + with gr.TabItem("Appearance", id="tab_appearance") as tab_app: + gr.Markdown("### Appearance Transfer Inputs") + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + app_struct_mesh = gr.Model3D(label="Structure Mesh (.glb)", interactive=True, height=300) + with gr.Column(scale=2): + app_struct_hidden = gr.Image(type="filepath", visible=False) + # app_struct_mesh_examples = gr.Examples(examples=EX_STRUCT_THUMBS, inputs=app_struct_hidden, label="Structure Examples") + app_struct_mesh_examples = gr.Examples( + examples=EX_STRUCT_THUMBS, + inputs=app_struct_hidden, + outputs=app_struct_mesh, # Target the 3D viewer directly + fn=load_mesh_from_thumb, # Run the conversion function + run_on_click=True, # Force execution on click + label="Structure Examples" + ) + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + app_ref_image = gr.Image(label="Reference Appearance Image", type="filepath", height=250) + with gr.Column(scale=2): + gr.Examples(examples=EX_IMG, inputs=app_ref_image, label="Image Examples") + + with gr.Row(elem_classes="input-row"): + with gr.Column(scale=3): + app_ref_mesh = gr.Model3D(label="Reference Appearance Mesh (.glb)", interactive=True, height=300) + with gr.Column(scale=2): + app_ref_mesh_hidden = gr.Image(type="filepath", visible=False) + # app_ref_mesh_examples = gr.Examples(examples=EX_MESH_THUMBS, inputs=app_ref_mesh_hidden, label="Mesh Examples") + app_ref_mesh_examples = gr.Examples( + examples=EX_MESH_THUMBS, + inputs=app_ref_mesh_hidden, + outputs=app_ref_mesh, # Target the 3D viewer directly + fn=load_mesh_from_thumb, # Run the conversion function + run_on_click=True, # Force execution on click + label="Mesh Examples" + ) + + # --- ADVANCED SETTINGS --- + with gr.Accordion("Advanced Settings", open=False): + with gr.Row(): + target_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Structure Mesh Up-Axis") + reference_up = gr.Radio(["Y-up", "Z-up"], value="Y-up", label="Appearance Mesh Up-Axis") + + with gr.Row(): + cfg_strength = gr.Slider(0.1, 10.0, value=5.0, step=0.1, label="CFG Strength") + num_steps = gr.Slider(50, 1000, value=300, step=50, label="Diffusion Steps") + learning_rate = gr.Number(value=5e-4, label="Learning Rate") + + # --- RUN BUTTON --- + with gr.Row(): + run_btn = gr.Button("Generate 3D Asset", variant="primary", size="lg") + + # --- OUTPUTS --- + gr.Markdown("### Results") + with gr.Row(): + with gr.Column(): + output_model = gr.Model3D(label="Output Mesh", interactive=False, clear_color=[1.0, 1.0, 1.0, 0.0]) + with gr.Column(): + output_video = gr.Video(label="Output Video", autoplay=True, loop=True, interactive=False) + + tab_sim.select(lambda: "Similarity", outputs=guidance_mode_state) + tab_app.select(lambda: "Appearance", outputs=guidance_mode_state) + + run_btn.click( + fn=on_run, + inputs=[ + guidance_mode_state, + app_struct_mesh, app_ref_mesh, app_ref_image, + sim_struct_mesh, sim_ref_text, sim_ref_image, + target_up, reference_up, cfg_strength, num_steps, learning_rate + ], + outputs=[output_model, output_video] + ) + + demo.load(None, None, None, js="() => { document.body.classList.remove('dark'); }") + +if __name__ == "__main__": + # demo.queue().launch(share=True, allowed_paths=[project_root], mcp_server=True) # Useful for Colab runs + demo.queue().launch( + allowed_paths=[project_root], + mcp_server=True, + css=css, + theme=gr.themes.Default( + primary_hue="sky", + secondary_hue="lime" + ).set( + body_background_fill="white", + background_fill_primary="white", + block_background_fill="white", + input_background_fill="#f9fafb" + ) + ) diff --git a/lib/util/generation.py b/lib/util/generation.py index e58f576..5a26a82 100644 --- a/lib/util/generation.py +++ b/lib/util/generation.py @@ -138,5 +138,5 @@ def decode_slat(generation_pipeline, feats, coords, out_meshpath, out_gspath): mesh_textured.export(out_meshpath) # Render the outputs - video = render_utils.render_video(outputs['gaussian'][0])['color'] + video = render_utils.render_video(outputs['gaussian'][0], bg_color=[255, 255, 255])['color'] imageio.mimsave(out_gspath, video, fps=30) \ No newline at end of file diff --git a/lib/util/render.py b/lib/util/render.py index db946a7..42fd2ef 100644 --- a/lib/util/render.py +++ b/lib/util/render.py @@ -12,7 +12,10 @@ def _install_blender(): os.system('sudo apt-get update') os.system('sudo apt-get install -y libxrender1 libxi6 libxkbcommon-x11-0 libsm6') os.system(f'wget {BLENDER_LINK} -P {BLENDER_INSTALLATION_PATH}') - os.system(f'tar -xvf {BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz -C {BLENDER_INSTALLATION_PATH}') + os.system(f'tar -xf {BLENDER_INSTALLATION_PATH}/blender-3.0.1-linux-x64.tar.xz -C {BLENDER_INSTALLATION_PATH}') + print(f'Blender installed at {BLENDER_PATH}') + else: + print(f'Blender already installed at {BLENDER_PATH}') def render_all_views(file_path, output_folder, num_views=150): _install_blender() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..900a82a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,55 @@ +--extra-index-url https://download.pytorch.org/whl/cu124 +--find-links https://data.pyg.org/whl/torch-2.5.0+cu124.html +--find-links https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.5.0_cu124.html + +torch==2.5.0+cu124 +torchvision==0.20.0+cu124 +torchaudio==2.5.0+cu124 +pillow==11.3.0 +imageio==2.37.2 +imageio-ffmpeg==0.6.0 +tqdm==4.67.1 +easydict==1.13 +opencv-python-headless==4.12.0.88 +scipy==1.16.3 +ninja==1.13.0 +rembg==2.0.68 +onnxruntime==1.23.2 +open3d==0.19.0 +xatlas==0.0.11 +pyvirtualdisplay==3.0 +pyvista==0.46.4 +pymeshfix==0.17.1 +igraph==1.0.0 +transformers==4.57.2 +tensorview==0.2.0 +git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8 +https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.5.4/flash_attn-2.8.3+cu124torch2.5-cp312-cp312-linux_x86_64.whl?download=true +https://huggingface.co/spaces/gradient-spaces/GuideFlow3D/resolve/main/demos/assets/wheels/diff_gaussian_rasterization-0.0.0-cp312-cp312-linux_x86_64.whl?download=true +https://huggingface.co/spaces/gradient-spaces/GuideFlow3D/resolve/main/demos/assets/wheels/diffoctreerast-0.0.0-cp312-cp312-linux_x86_64.whl?download=true +https://huggingface.co/spaces/gradient-spaces/GuideFlow3D/resolve/main/demos/assets/wheels/nvdiffrast-0.3.5-py3-none-any.whl?download=true +python-pycg[all] +spconv-cu124==2.3.8 +psutil==5.9.5 +lightning==2.2 +h5py==3.15.1 +yacs==0.1.8 +trimesh==4.10.0 +scikit-image==0.25.2 +loguru==0.7.3 +boto3==1.41.5 +mesh2sdf==1.1.0 +tetgen==0.6.4 +pymeshlab==2025.7 +plyfile==1.1.3 +einops==0.8.1 +libigl==2.6.1 +polyscope==2.5.0 +potpourri3d==1.3 +simple_parsing==0.1.7 +arrgh==1.0.0 +open3d==0.19.0 +torch-scatter==2.1.2+pt25cu124 +kaolin==0.18.0 +vtk==9.5.2 +tetgen==0.6.4 \ No newline at end of file diff --git a/third_party/TRELLIS/dataset_toolkits/blender_script/render.py b/third_party/TRELLIS/dataset_toolkits/blender_script/render.py index ff2e918..ba0010b 100644 --- a/third_party/TRELLIS/dataset_toolkits/blender_script/render.py +++ b/third_party/TRELLIS/dataset_toolkits/blender_script/render.py @@ -33,7 +33,7 @@ 'TARGA': 'tga' } -def init_render(engine='CYCLES', resolution=512, geo_mode=False): +def init_render(engine='CYCLES', resolution=512, geo_mode=False, threads=None): bpy.context.scene.render.engine = engine bpy.context.scene.render.resolution_x = resolution bpy.context.scene.render.resolution_y = resolution @@ -41,6 +41,11 @@ def init_render(engine='CYCLES', resolution=512, geo_mode=False): bpy.context.scene.render.image_settings.file_format = 'PNG' bpy.context.scene.render.image_settings.color_mode = 'RGBA' bpy.context.scene.render.film_transparent = True + bpy.context.scene.render.use_persistent_data = True + + if threads is not None and threads > 0: + bpy.context.scene.render.threads_mode = 'FIXED' + bpy.context.scene.render.threads = threads bpy.context.scene.cycles.device = 'GPU' bpy.context.scene.cycles.samples = 128 if not geo_mode else 1 @@ -51,9 +56,40 @@ def init_render(engine='CYCLES', resolution=512, geo_mode=False): bpy.context.scene.cycles.transparent_max_bounces = 3 if not geo_mode else 0 bpy.context.scene.cycles.transmission_bounces = 3 if not geo_mode else 1 bpy.context.scene.cycles.use_denoising = True + # bpy.context.scene.cycles.denoiser = 'OPTIX' + bpy.context.scene.cycles.denoiser = 'OPENIMAGEDENOISE' + bpy.context.scene.cycles.noise_threshold = 0.05 + + # Force single tile if possible to maximize GPU throughput for small images + if hasattr(bpy.context.scene.cycles, 'use_auto_tile'): + bpy.context.scene.cycles.use_auto_tile = False + if hasattr(bpy.context.scene.cycles, 'tile_size'): + bpy.context.scene.cycles.tile_size = resolution - bpy.context.preferences.addons['cycles'].preferences.get_devices() - bpy.context.preferences.addons['cycles'].preferences.compute_device_type = 'CUDA' + # Device setup + cycles_prefs = bpy.context.preferences.addons['cycles'].preferences + cycles_prefs.get_devices() + + cuda_devices = [d for d in cycles_prefs.devices if d.type == 'CUDA'] + optix_devices = [d for d in cycles_prefs.devices if d.type == 'OPTIX'] + + # Check environment variable to optionally disable GPU + use_gpu = (len(cuda_devices) > 0 or len(optix_devices) > 0) and os.environ.get("CUDA_VISIBLE_DEVICES") != "-1" + + if use_gpu: + bpy.context.scene.cycles.device = 'GPU' + if len(optix_devices) > 0: + cycles_prefs.compute_device_type = 'OPTIX' + else: + cycles_prefs.compute_device_type = 'CUDA' + + for device in cycles_prefs.devices: + if device.type in {'CUDA', 'OPTIX'}: + device.use = True + print(f"[init_render] Using GPU: {cycles_prefs.compute_device_type}") + else: + bpy.context.scene.cycles.device = 'CPU' + print("[init_render] GPU not found or disabled, using CPU.") def init_nodes(save_depth=False, save_normal=False, save_albedo=False, save_mist=False): if not any([save_depth, save_normal, save_albedo, save_mist]): @@ -417,7 +453,7 @@ def main(arg): os.makedirs(arg.output_folder, exist_ok=True) # Initialize context - init_render(engine=arg.engine, resolution=arg.resolution, geo_mode=arg.geo_mode) + init_render(engine=arg.engine, resolution=arg.resolution, geo_mode=arg.geo_mode, threads=arg.threads) outputs, spec_nodes = init_nodes( save_depth=arg.save_depth, save_normal=arg.save_normal, @@ -521,6 +557,7 @@ def main(arg): parser.add_argument('--save_mist', action='store_true', help='Save the mist distance maps.') parser.add_argument('--split_normal', action='store_true', help='Split the normals of the mesh.') parser.add_argument('--save_mesh', action='store_true', help='Save the mesh as a .ply file.') + parser.add_argument('--threads', type=int, default=None, help='Number of CPU threads to use.') argv = sys.argv[sys.argv.index("--") + 1:] args = parser.parse_args(argv) diff --git a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/ablate_L_dev.jpg b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/ablate_L_dev.jpg index 461bd1c..656e7c5 100644 Binary files a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/ablate_L_dev.jpg and b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/ablate_L_dev.jpg differ diff --git a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_final.png b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_final.png index 8b19771..07a18ad 100644 Binary files a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_final.png and b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_final.png differ diff --git a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_init.png b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_init.png index 3c682b9..aadc74a 100644 Binary files a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_init.png and b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/block_init.png differ diff --git a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/teaser_top.png b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/teaser_top.png index 719ddc2..5ae1289 100644 Binary files a/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/teaser_top.png and b/third_party/TRELLIS/trellis/representations/mesh/flexicubes/images/teaser_top.png differ diff --git a/third_party/TRELLIS/trellis/utils/elastic_utils.py b/third_party/TRELLIS/trellis/utils/elastic_utils.py new file mode 100644 index 0000000..e9ebece --- /dev/null +++ b/third_party/TRELLIS/trellis/utils/elastic_utils.py @@ -0,0 +1,228 @@ +from abc import abstractmethod +from contextlib import contextmanager +from typing import Tuple +import torch +import torch.nn as nn +import numpy as np + + +class MemoryController: + """ + Base class for memory management during training. + """ + + _last_input_size = None + _last_mem_ratio = [] + + @contextmanager + def record(self): + pass + + def update_run_states(self, input_size=None, mem_ratio=None): + if self._last_input_size is None: + self._last_input_size = input_size + elif self._last_input_size!= input_size: + raise ValueError(f'Input size should not change for different ElasticModules.') + self._last_mem_ratio.append(mem_ratio) + + @abstractmethod + def get_mem_ratio(self, input_size): + pass + + @abstractmethod + def state_dict(self): + pass + + @abstractmethod + def log(self): + pass + + +class LinearMemoryController(MemoryController): + """ + A simple controller for memory management during training. + The memory usage is modeled as a linear function of: + - the number of input parameters + - the ratio of memory the model use compared to the maximum usage (with no checkpointing) + memory_usage = k * input_size * mem_ratio + b + The controller keeps track of the memory usage and gives the + expected memory ratio to keep the memory usage under a target + """ + def __init__( + self, + buffer_size=1000, + update_every=500, + target_ratio=0.8, + available_memory=None, + max_mem_ratio_start=0.1, + params=None, + device=None + ): + self.buffer_size = buffer_size + self.update_every = update_every + self.target_ratio = target_ratio + self.device = device or torch.cuda.current_device() + self.available_memory = available_memory or torch.cuda.get_device_properties(self.device).total_memory / 1024**3 + + self._memory = np.zeros(buffer_size, dtype=np.float32) + self._input_size = np.zeros(buffer_size, dtype=np.float32) + self._mem_ratio = np.zeros(buffer_size, dtype=np.float32) + self._buffer_ptr = 0 + self._buffer_length = 0 + self._params = tuple(params) if params is not None else (0.0, 0.0) + self._max_mem_ratio = max_mem_ratio_start + self.step = 0 + + def __repr__(self): + return f'LinearMemoryController(target_ratio={self.target_ratio}, available_memory={self.available_memory})' + + def _add_sample(self, memory, input_size, mem_ratio): + self._memory[self._buffer_ptr] = memory + self._input_size[self._buffer_ptr] = input_size + self._mem_ratio[self._buffer_ptr] = mem_ratio + self._buffer_ptr = (self._buffer_ptr + 1) % self.buffer_size + self._buffer_length = min(self._buffer_length + 1, self.buffer_size) + + @contextmanager + def record(self): + torch.cuda.reset_peak_memory_stats(self.device) + self._last_input_size = None + self._last_mem_ratio = [] + yield + self._last_memory = torch.cuda.max_memory_allocated(self.device) / 1024**3 + self._last_mem_ratio = sum(self._last_mem_ratio) / len(self._last_mem_ratio) + self._add_sample(self._last_memory, self._last_input_size, self._last_mem_ratio) + self.step += 1 + if self.step % self.update_every == 0: + self._max_mem_ratio = min(1.0, self._max_mem_ratio + 0.1) + self._fit_params() + + def _fit_params(self): + memory_usage = self._memory[:self._buffer_length] + input_size = self._input_size[:self._buffer_length] + mem_ratio = self._mem_ratio[:self._buffer_length] + + x = input_size * mem_ratio + y = memory_usage + k, b = np.polyfit(x, y, 1) + self._params = (k, b) + # self._visualize() + + def _visualize(self): + import matplotlib.pyplot as plt + memory_usage = self._memory[:self._buffer_length] + input_size = self._input_size[:self._buffer_length] + mem_ratio = self._mem_ratio[:self._buffer_length] + k, b = self._params + + plt.scatter(input_size * mem_ratio, memory_usage, c=mem_ratio, cmap='viridis') + x = np.array([0.0, 20000.0]) + plt.plot(x, k * x + b, c='r') + plt.savefig(f'linear_memory_controller_{self.step}.png') + plt.cla() + + def get_mem_ratio(self, input_size): + k, b = self._params + if k == 0: return np.random.rand() * self._max_mem_ratio + pred = (self.available_memory * self.target_ratio - b) / (k * input_size) + return min(self._max_mem_ratio, max(0.0, pred)) + + def state_dict(self): + return { + 'params': self._params, + } + + def load_state_dict(self, state_dict): + self._params = tuple(state_dict['params']) + + def log(self): + return { + 'params/k': self._params[0], + 'params/b': self._params[1], + 'memory': self._last_memory, + 'input_size': self._last_input_size, + 'mem_ratio': self._last_mem_ratio, + } + + +class ElasticModule(nn.Module): + """ + Module for training with elastic memory management. + """ + def __init__(self): + super().__init__() + self._memory_controller: MemoryController = None + + @abstractmethod + def _get_input_size(self, *args, **kwargs) -> int: + """ + Get the size of the input data. + + Returns: + int: The size of the input data. + """ + pass + + @abstractmethod + def _forward_with_mem_ratio(self, *args, mem_ratio=0.0, **kwargs) -> Tuple[float, Tuple]: + """ + Forward with a given memory ratio. + """ + pass + + def register_memory_controller(self, memory_controller: MemoryController): + self._memory_controller = memory_controller + + def forward(self, *args, **kwargs): + if self._memory_controller is None or not torch.is_grad_enabled() or not self.training: + _, ret = self._forward_with_mem_ratio(*args, **kwargs) + else: + input_size = self._get_input_size(*args, **kwargs) + mem_ratio = self._memory_controller.get_mem_ratio(input_size) + mem_ratio, ret = self._forward_with_mem_ratio(*args, mem_ratio=mem_ratio, **kwargs) + self._memory_controller.update_run_states(input_size, mem_ratio) + return ret + + +class ElasticModuleMixin: + """ + Mixin for training with elastic memory management. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._memory_controller: MemoryController = None + + @abstractmethod + def _get_input_size(self, *args, **kwargs) -> int: + """ + Get the size of the input data. + + Returns: + int: The size of the input data. + """ + pass + + @abstractmethod + @contextmanager + def with_mem_ratio(self, mem_ratio=1.0) -> float: + """ + Context manager for training with a reduced memory ratio compared to the full memory usage. + + Returns: + float: The exact memory ratio used during the forward pass. + """ + pass + + def register_memory_controller(self, memory_controller: MemoryController): + self._memory_controller = memory_controller + + def forward(self, *args, **kwargs): + if self._memory_controller is None or not torch.is_grad_enabled() or not self.training: + ret = super().forward(*args, **kwargs) + else: + input_size = self._get_input_size(*args, **kwargs) + mem_ratio = self._memory_controller.get_mem_ratio(input_size) + with self.with_mem_ratio(mem_ratio) as exact_mem_ratio: + ret = super().forward(*args, **kwargs) + self._memory_controller.update_run_states(input_size, exact_mem_ratio) + return ret \ No newline at end of file