diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..2500f2e
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+REPLICATE_API_TOKEN=
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d993b3d..3d2f9fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -398,7 +398,7 @@ FodyWeavers.xsd
 *.sln.iml
 
 # env files from the flux server
-FLUX_inpainting_server/env/lib
+inpainting/flux_inpainting_server/env/lib
 tmp*
 
 # SLURM output
@@ -411,4 +411,13 @@ wandb/*
 *.pt
 *.pth
 
-.vscode/
\ No newline at end of file
+.vscode/
+
+# Environment variables 
+.env
+
+# Output scenes 
+scenes 
+
+# Blender 
+blender-3.6*/
\ No newline at end of file
diff --git a/README.md b/README.md
index 34423bd..451bf5c 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ SynCity generates complex and immersive 3D worlds from text prompts and does not
 
 ### Prerequisites
 * **System**: The code was tested on Ubuntu 22.04. We expect it to run on other Linux-based distributions too.
-* **Hardware**: An NVIDIA GPU with at least **48GB** of memory is required. We have used A40 and A6000 GPUs.
+* **Hardware**: If the inpainting server is deployed locally, an NVIDIA GPU with at least **48GB** of memory is required. We have used A40 and A6000 GPUs. If you use the inpainting service from replicate, you will need a GPU with at least **16GB** of memory for trellis generation.
 * **Software**:
     - The [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive) is needed to compile certain submodules. We have tested CUDA versions 11.8 and 12.4.
     - [Conda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install) is used to create the environment to run the code. This environment uses Python version 3.10.
@@ -33,10 +33,23 @@ source ./setup.sh --new-env --basic --xformers --diffoctreerast --spconv --mipga
 ```
 Make sure to have set the environment variable `CUDA_HOME`, which should point to your CUDA Toolkit installation. If you run into issues while running this setup script, please refer to [the README in the TRELLIS](https://github.com/microsoft/TRELLIS/blob/main/README.md#installation-steps) repository, which provides additional guidance.
 
-3. Set up the FLUX inpainting server:
+
+3. Set up the FLUX inpainting backend:
+
+**Option1**:  Set up the FLUX inpainting server locally (the server will require around **30GB+ VRAM**):
 ```
 ./inpainting_server.sh --install
 ```
+**Option2**: Use replicate web deployment (pay as you go, about 0.03$ for every image and 0.4$ every 3x3 scene). 
+```
+cp .env.example .env 
+
+# fill in your replicate id, which can be obtained here
+# https://replicate.com/account/api-tokens
+
+# install requirements for replicate
+pip install dotenv replicate
+```
 
 4. Download [Blender 3.6.19](https://www.blender.org/download/release/Blender3.6/blender-3.6.19-linux-x64.tar.xz), extract it into the root directory of this project, and make sure `blender-3.6.19-linux-x64/blender` can be executed on your system.
 
@@ -51,7 +64,11 @@ The process to generate a world is split into two straightforward steps.
 ### Step 1: Generating Tiles
 The tiles are generated using an instruction file, which contain the prompts to generate each tile (see some examples in the `instructions` folder). To generate a set of tiles that will be saved to `scenes/solarpunk`, run:
 ```
+# option1: locally deploy the inpainting server
 python run_pipeline.py --instructions instructions/3x3/solarpunk.json --prefix scenes/solarpunk --gradio_url=http://127.0.0.1:7860
+
+# option2: use the replicate inpainting service
+python run_pipeline.py --instructions instructions/3x3/solarpunk.json --prefix scenes/solarpunk --parallel=False --inpainter_type=flux_replicate
 ```
 
 This script will parallelize tile generation where possible if multiple GPUs are available. If the script is stalling for longer than a minute, consider running the tile generation synchronously (`--parallel=False`). Furthermore, if a single tile keeps being regenerated, consider interrupting the script and replacing the offending tile's prompt. Then, restart the script with `--skip_existing=True` to ensure it will not overwrite existing tiles. Alternatively, see the ["Advanced Usage"](#advanced_usage) section on how to adjust the tile rejection criteria.
@@ -59,7 +76,11 @@ This script will parallelize tile generation where possible if multiple GPUs are
 ### Step 2: Blending Tiles
 To create smooth transitions between tiles and refine their boundary regions, run the blending script:
 ```
+# option1: locally deploy the inpainting server
 python blend_gaussians.py --compute_rescaled --stitch_images --stitch_slats --gradio_url=http://127.0.0.1:7860 --prefix scenes/solarpunk
+
+# option2: use the replicate inpainting service
+python blend_gaussians.py --compute_rescaled --stitch_images --stitch_slats --inpainter_type=flux_replicate --prefix scenes/solarpunk
 ```
 
 This script will create a `.ply` file with the Gaussians of the entire grid as well as a video rendering.
diff --git a/blend_gaussians.py b/blend_gaussians.py
index 680b94d..5fbe1f8 100644
--- a/blend_gaussians.py
+++ b/blend_gaussians.py
@@ -12,6 +12,7 @@
 import torch.nn.functional as F
 from PIL import Image
 from tqdm.auto import tqdm
+from typing import Literal
 
 import trellis.models as models
 from tile_cutting import z_preserving_crop
@@ -477,6 +478,7 @@ def merge_gaussians(
         stitch_images: bool = False, 
         stitch_slats: bool = False, 
         use_cached: bool = False,
+        inpainter_type: Literal["flux_local", "flux_replicate", "sdxl_replicate"] = "flux_local",
         gradio_url='http://127.0.0.1:7860',
         blender_path: str = 'blender-3.6.19-linux-x64/blender',
         seed: int = 429
@@ -577,9 +579,9 @@ def merge_gaussians(
         rescaled_tiles = dill.load(open(os.path.join(grid_path, 'rescaled_tiles.pkl'), 'rb'))
 
     if stitch_images:
-        from FLUX_inpainting_server.inpaint import Inpainter
+        from inpainting import Inpainter
         import time
-        inpainter = Inpainter(gradio_url)
+        inpainter = Inpainter(inpainter_type, gradio_url)
 
         VIEW_TYPE = 'zoom_out'
         prompts = json.load(open(os.path.join(grid_path, 'instructions.json')))
diff --git a/inpainting/__init__.py b/inpainting/__init__.py
new file mode 100644
index 0000000..f70bfdd
--- /dev/null
+++ b/inpainting/__init__.py
@@ -0,0 +1,22 @@
+from dotenv import load_dotenv
+from typing import Literal
+from PIL import Image
+from .flux_inpainter_server.inpaint import Inpainter as FluxInpainter 
+from .replicate_inpainter import ReplicateFluxInpainter, ReplicateSDXLInpainter
+
+# load api keys of replicate
+load_dotenv()
+
+class Inpainter:
+    def __init__(self, inpainter_type: Literal["flux_local", "flux_replicate", "sdxl_replicate"] = "flux_local", gradio_url: str = ""):
+        if inpainter_type == "flux_local":
+            self.inpainter = FluxInpainter(gradio_url)
+        elif inpainter_type == "flux_replicate":
+            self.inpainter = ReplicateFluxInpainter()
+        elif inpainter_type == "sdxl_replicate":
+            self.inpainter = ReplicateSDXLInpainter()
+        else:
+            raise ValueError(f"Invalid inpainter_type: {inpainter_type}")
+
+    def __call__(self,  image:Image.Image, mask:Image.Image, seed:int, prompt:str):
+        return self.inpainter(image, mask, seed, prompt)
diff --git a/FLUX_inpainting_server/.gitattributes b/inpainting/flux_inpainter_server/.gitattributes
similarity index 100%
rename from FLUX_inpainting_server/.gitattributes
rename to inpainting/flux_inpainter_server/.gitattributes
diff --git a/FLUX_inpainting_server/README.md b/inpainting/flux_inpainter_server/README.md
similarity index 100%
rename from FLUX_inpainting_server/README.md
rename to inpainting/flux_inpainter_server/README.md
diff --git a/FLUX_inpainting_server/app.py b/inpainting/flux_inpainter_server/app.py
similarity index 100%
rename from FLUX_inpainting_server/app.py
rename to inpainting/flux_inpainter_server/app.py
diff --git a/FLUX_inpainting_server/controlnet_flux.py b/inpainting/flux_inpainter_server/controlnet_flux.py
similarity index 100%
rename from FLUX_inpainting_server/controlnet_flux.py
rename to inpainting/flux_inpainter_server/controlnet_flux.py
diff --git a/FLUX_inpainting_server/inpaint.py b/inpainting/flux_inpainter_server/inpaint.py
similarity index 100%
rename from FLUX_inpainting_server/inpaint.py
rename to inpainting/flux_inpainter_server/inpaint.py
diff --git a/FLUX_inpainting_server/main.py b/inpainting/flux_inpainter_server/main.py
similarity index 69%
rename from FLUX_inpainting_server/main.py
rename to inpainting/flux_inpainter_server/main.py
index 7419d95..280356c 100644
--- a/FLUX_inpainting_server/main.py
+++ b/inpainting/flux_inpainter_server/main.py
@@ -10,6 +10,7 @@
 image_path='https://huggingface.co/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha/resolve/main/images/bucket.png',
 mask_path='https://huggingface.co/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha/resolve/main/images/bucket_mask.jpeg',
 prompt='a person wearing a white shoe, carrying a white bucket with text "FLUX" on it'
+prompt_detailed = 'an ivy-covered red brick building with classical columns and arched windows, on top of a base, east coast university, ivy-clad red brick buildings, cobblestone paths, gentle autumn light, soft warm lighting, realistic textures, subtle gradients, isometric perspective, academic charm, and meticulous detailing'
 
 # Build pipeline
 controlnet = FluxControlNetModel.from_pretrained("alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha", torch_dtype=torch.bfloat16)
@@ -48,3 +49,20 @@
 
 result.save('flux_inpaint.png')
 print("Successfully inpaint image")
+
+result = pipe(
+    prompt=prompt_detailed,
+    height=size[1],
+    width=size[0],
+    control_image=image,
+    control_mask=mask,
+    num_inference_steps=28,
+    generator=generator,
+    controlnet_conditioning_scale=0.9,
+    guidance_scale=3.5,
+    negative_prompt="",
+    true_guidance_scale=3.5
+).images[0]
+
+result.save('flux_inpaint_detailed.png')
+print("Successfully inpaint image")
diff --git a/FLUX_inpainting_server/pipeline_flux_controlnet_inpaint.py b/inpainting/flux_inpainter_server/pipeline_flux_controlnet_inpaint.py
similarity index 100%
rename from FLUX_inpainting_server/pipeline_flux_controlnet_inpaint.py
rename to inpainting/flux_inpainter_server/pipeline_flux_controlnet_inpaint.py
diff --git a/FLUX_inpainting_server/readme.md b/inpainting/flux_inpainter_server/readme.md
similarity index 100%
rename from FLUX_inpainting_server/readme.md
rename to inpainting/flux_inpainter_server/readme.md
diff --git a/FLUX_inpainting_server/requirements.txt b/inpainting/flux_inpainter_server/requirements.txt
similarity index 100%
rename from FLUX_inpainting_server/requirements.txt
rename to inpainting/flux_inpainter_server/requirements.txt
diff --git a/FLUX_inpainting_server/transformer_flux.py b/inpainting/flux_inpainter_server/transformer_flux.py
similarity index 100%
rename from FLUX_inpainting_server/transformer_flux.py
rename to inpainting/flux_inpainter_server/transformer_flux.py
diff --git a/inpainting/replicate_inpainter/__init__.py b/inpainting/replicate_inpainter/__init__.py
new file mode 100644
index 0000000..5a112c7
--- /dev/null
+++ b/inpainting/replicate_inpainter/__init__.py
@@ -0,0 +1,4 @@
+from .sdxl import ReplicateSDXLInpainter
+from .flux import ReplicateFluxInpainter
+
+__all__ = ["ReplicateSDXLInpainter", "ReplicateFluxInpainter"]
diff --git a/inpainting/replicate_inpainter/base.py b/inpainting/replicate_inpainter/base.py
new file mode 100644
index 0000000..d7951a9
--- /dev/null
+++ b/inpainting/replicate_inpainter/base.py
@@ -0,0 +1,52 @@
+import os, os.path as osp 
+import replicate
+import numpy as np
+from abc import ABC, abstractmethod
+from PIL import Image
+
+TMP_DIR = "./tmp_flux"
+TMP_PATH = osp.join(TMP_DIR, "result.png")
+
+class BaseReplicateInpainter(ABC):
+    REPLICATE_ID = ""
+    def __init__(self) -> None:
+        pass
+
+    @abstractmethod
+    def _build_extra_inputs(self):
+        pass
+
+    def run(self, image: Image, mask: Image, seed: int, prompt: str):
+        inputs = {
+            "image": image,
+            "mask": mask,
+            "seed": seed,
+            "prompt": prompt,
+        }
+        inputs.update(self._build_extra_inputs())
+        print(inputs.keys())
+        return replicate.run(
+            self.REPLICATE_ID,
+            input=inputs
+        )
+    
+    def __call__(self, image: Image, mask: Image, seed: int, prompt: str):
+        os.makedirs(TMP_DIR, exist_ok=True)
+        image = image.convert("RGB")
+        mask_rgb = Image.new('RGB', mask.size)
+        mask_rgb.paste(mask)
+        image_tmp_path = osp.join(TMP_DIR, "image.png")
+        mask_tmp_path = osp.join(TMP_DIR, "mask.png")
+        image.save(image_tmp_path)
+        mask_rgb.save(mask_tmp_path)
+
+        print(np.array(mask_rgb).shape, np.array(image).shape)
+        # exit(0)
+
+        output = self.run(open(image_tmp_path, "rb"), open(mask_tmp_path, "rb"), seed, prompt)
+        # output = self.run(image, mask, seed, prompt)
+        assert len(output) == 1
+
+        with open(TMP_PATH, "wb") as file:
+            file.write(output[0].read())
+        return Image.open(TMP_PATH)
\ No newline at end of file
diff --git a/inpainting/replicate_inpainter/flux.py b/inpainting/replicate_inpainter/flux.py
new file mode 100644
index 0000000..a1503d5
--- /dev/null
+++ b/inpainting/replicate_inpainter/flux.py
@@ -0,0 +1,21 @@
+from .base import BaseReplicateInpainter
+
+
+class ReplicateFluxInpainter(BaseReplicateInpainter):
+    """
+    Replicate playground: https://replicate.com/black-forest-labs/flux-fill-dev
+    Speed and Cost: 0.04$ / image, 9.6s / image
+    """
+
+    REPLICATE_ID = "fishwowater/flux-dev-controlnet-inpainting-beta:27d3ff35f58b4409775de5a0b36e99b4c6d2d7fc7fe772b35170951db678ec63"
+
+    def _build_extra_inputs(self):
+        return {
+            # default guidance scale
+            "guidance_scale": 3.5,
+            "true_guidance_scale": 3.5,
+            "controlnet_conditioning_scale": 0.9,
+            # default values for inference steps
+            "num_inference_steps": 24,
+            "output_quality": 100,
+        }
diff --git a/inpainting/replicate_inpainter/sdxl.py b/inpainting/replicate_inpainter/sdxl.py
new file mode 100644
index 0000000..2b117dc
--- /dev/null
+++ b/inpainting/replicate_inpainter/sdxl.py
@@ -0,0 +1,19 @@
+from .base import BaseReplicateInpainter
+
+
+
+class ReplicateSDXLInpainter(BaseReplicateInpainter):
+    """
+    Replicate playground: https://replicate.com/lucataco/sdxl-inpainting
+    Speed and Cost: 0.0023$ / image, 1.9s / image
+    """
+
+    REPLICATE_ID = "lucataco/sdxl-inpainting:a5b13068cc81a89a4fbeefeccc774869fcb34df4dbc92c1555e0f2771d49dde7"
+
+    def _build_extra_inputs(self):
+        return {
+            # default values on the playground
+            "guidance_scale": 8.0,
+            "steps": 20,
+            "strength": 0.7,
+        }
\ No newline at end of file
diff --git a/inpainting_server.sh b/inpainting_server.sh
index 0be5d24..ec76e7c 100755
--- a/inpainting_server.sh
+++ b/inpainting_server.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # Installation as shown here: https://huggingface.co/spaces/ameerazam08/FLUX.1-dev-Inpainting-Model-Beta-GPU?docker=true
-cd FLUX_inpainting_server
+cd inpainting_server/flux_inpainter
 python -m venv env
 source env/bin/activate
 
diff --git a/run_pipeline.py b/run_pipeline.py
index 63867b8..47bcc0a 100644
--- a/run_pipeline.py
+++ b/run_pipeline.py
@@ -24,7 +24,7 @@
 from lpips import LPIPS, im2tensor
 from PIL import Image
 
-from FLUX_inpainting_server.inpaint import Inpainter
+from inpainting import Inpainter
 
 # a brief explanation of the orthographic scale:
 # the ortho scale determines the size of the world in the image
@@ -301,7 +301,7 @@ def inpaint_tile(
         base_ortho_scale: float = 1.75,
 ):  
     if isinstance(server, str):
-        inpainter = Inpainter(server)
+        inpainter = Inpainter("flux_local", server)
     else:
         inpainter = server
     
@@ -510,7 +510,7 @@ def rebased_inpainted_tile(inpainted_image_path, base_slab_path, is_left_tile: b
 
     return merged
 
-def worker(prefix, tile_dict, gradio_url, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, init_seed=429, verbose=True):
+def worker(prefix, tile_dict, gradio_url, inpainter_type, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, init_seed=429, verbose=True):
     if not verbose:
         sys.stdout = open("/dev/null", 'w')
         sys.stderr = open("/dev/null", 'w')
@@ -575,7 +575,7 @@ def load_pipeline_in_thread(event, queue):
 
         seed = init_seed + task_id + attempts
 
-        inpainting_server = Inpainter(gradio_url)
+        inpainting_server = Inpainter(inpainter_type, gradio_url)
 
         tile_mq.put({"pos": pos, "state": States.INPAINTING, "task_id": task_id})
 
@@ -688,6 +688,7 @@ def main(
         workers_per_gpu: int = 1,
         seed: int = 1429,
         gradio_url: str = 'http://127.0.0.1:7860',
+        inpainter_type: Literal["flux_local", "flux_replicate", "sdxl_replicate"] = "flux_local",
         blender_path: str = 'blender-3.6.19-linux-x64/blender',
         resample: Tuple[int, int] = None,
         resample_prompt: str = None,
@@ -802,12 +803,12 @@ def queue_available_jobs():
             task_id = len(results)
 
             if parallel:
-                res = pool.apply_async(worker, (prefix, tile, gradio_url, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, seed), error_callback=partial(announce_crash, tile, task_id))
+                res = pool.apply_async(worker, (prefix, tile, gradio_url, inpainter_type, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, seed), error_callback=partial(announce_crash, tile, task_id))
             else:
                 # peek at the gpu queue
                 gpu_id = gpu_queue.get()
                 gpu_queue.put(gpu_id)
-                res = worker(prefix, tile, gradio_url, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, seed, verbose=True)
+                res = worker(prefix, tile, gradio_url, inpainter_type, blender_path, gpu_queue, generated_grid, first_tile_path, tile_mq, task_id, config, seed, verbose=True)
                 gpu_queue.put(gpu_id)
 
             results.append(res)
@@ -862,4 +863,4 @@ def queue_available_jobs():
 
 if __name__ == '__main__':
     import fire
-    fire.Fire(main)
+    fire.Fire(main)
\ No newline at end of file