From 80523e79c8d4756dddad921e23bd4ec5d7aeee2a Mon Sep 17 00:00:00 2001 From: wangyukai Date: Mon, 8 Sep 2025 08:24:09 +0000 Subject: [PATCH 1/4] delete the useless load_from_pretrained flag. --- internnav/configs/trainer/il.py | 1 - internnav/model/__init__.py | 138 +----------------- .../configs/challenge_train_kujiale_cfg.py | 5 +- .../train/configs/challenge_train_mp3d_cfg.py | 3 +- scripts/train/configs/cma.py | 7 +- scripts/train/configs/cma_plus.py | 7 +- scripts/train/configs/navdp.py | 10 +- scripts/train/configs/rdp.py | 3 +- scripts/train/configs/seq2seq.py | 3 +- scripts/train/configs/seq2seq_plus.py | 3 +- 10 files changed, 22 insertions(+), 158 deletions(-) diff --git a/internnav/configs/trainer/il.py b/internnav/configs/trainer/il.py index 9fc4bc43..d35d7692 100644 --- a/internnav/configs/trainer/il.py +++ b/internnav/configs/trainer/il.py @@ -26,7 +26,6 @@ class IlCfg(BaseModel, extra='allow'): save_filter_frozen_weights: Optional[bool] = None load_from_ckpt: Optional[bool] ckpt_to_load: Optional[str] - load_from_pretrain: Optional[bool] dataset_r2r_root_dir: Optional[str] = None dataset_3dgs_root_dir: Optional[str] = None dataset_grutopia10_root_dir: Optional[str] = None diff --git a/internnav/model/__init__.py b/internnav/model/__init__.py index 02d8c4b5..9ac47edc 100644 --- a/internnav/model/__init__.py +++ b/internnav/model/__init__.py @@ -8,18 +8,17 @@ from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig from .basemodel.cma.cma_policy import CMAModelConfig, CMANet -from .basemodel.rdp.rdp_policy import RDPNet, RDPModelConfig +from .basemodel.internvla_n1.internvla_n1_policy import ( + InternVLAN1ModelConfig, + InternVLAN1Net, +) from .basemodel.navdp.navdp_policy import NavDPModelConfig, NavDPNet - -from .basemodel.seq2seq.seq2seq_policy import Seq2SeqNet, Seq2SeqModelConfig - -from .basemodel.internvla_n1.internvla_n1_policy import InternVLAN1Net, InternVLAN1ModelConfig - +from .basemodel.rdp.rdp_policy import RDPModelConfig, RDPNet +from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet from .utils.misc import set_cuda, set_random_seed, wrap_model from .utils.save import load_checkpoint - def get_policy(policy_name): if policy_name == 'CMA_CLIP_Policy': return CMA_CLIP_Net @@ -52,128 +51,3 @@ def get_config(policy_name): return NavDPModelConfig else: raise ValueError(f'Policy {policy_name} not found') - - -def initialize_policy( - config, - device=None, - action_stats=None, -) -> None: - from internnav.utils.common_log_util import common_logger as logger # 廢迟导ε…₯ - - load_from_ckpt = config.il.load_from_ckpt - load_from_pretrain = config.il.load_from_pretrain - - default_gpu, n_gpu, device = set_cuda(config, device) - if default_gpu: - logger.info( - 'device: {} n_gpu: {}, distributed training: {}'.format(device, n_gpu, bool(config.local_rank != -1)) - ) - - seed = config.seed - if config.ddp.use: - seed += config.local_rank - set_random_seed(seed) - - # if default_gpu: - # save_training_meta(config) - - observation_space = spaces.Box( - low=0.0, - high=1.0, - shape=(256, 256, 1), - dtype=np.float32, - ) - - policy = get_policy(config.model.policy_name) - - self_policy = policy( - config=config.model, - observation_space=observation_space, - ) - - if load_from_pretrain: - new_ckpt_weights = {} - model_config = config.model - self_policy.load_state_dict(new_ckpt_weights, strict=False) - - start_epoch = 0 - if load_from_ckpt: - ckpt_path = config.il.ckpt_to_load - ckpt_dict = load_checkpoint(ckpt_path, map_location='cpu') - if 'state_dict' in ckpt_dict: - state_dict = ckpt_dict['state_dict'] - else: - state_dict = ckpt_dict - if 'epoch' in ckpt_dict: - start_epoch = ckpt_dict['epoch'] - new_state_dict = {} - # Iterate through the state dictionary items - for k, v in state_dict.items(): - new_key = k.replace('module.', '') - if config.model.policy_name != 'RDP_Policy': - new_key = new_key.replace('net.', '') # this is for cma policy - new_state_dict[new_key] = v - del state_dict[k] # Remove the old key with 'module.' - - incompatible_keys, _ = self_policy.load_state_dict(new_state_dict, strict=False) - if len(incompatible_keys) > 0: - logger.warning(f'Incompatible keys: {incompatible_keys}') - logger.info(f'Loaded weights from checkpoint: {ckpt_path}') - - params = sum(param.numel() for param in self_policy.parameters()) - params_t = sum(p.numel() for p in self_policy.parameters() if p.requires_grad) - logger.info(f'Agent parameters: {params / 1e6:.2f}M. Trainable: {params_t / 1e6:.2f}M') - logger.info('Finished setting up policy.') - - if len(config.torch_gpu_ids) == 1: - config.ddp.use = False - if config.ddp.use: - if config.ddp.use_dp: - # Data parallel - self_policy = wrap_model( - self_policy, - config.torch_gpu_ids, - config.local_rank, - logger, - config.world_size, - use_dp=config.ddp.use_dp, - ) - else: - # Distributed data parallel - self_policy = wrap_model( - self_policy, - torch.device(f'cuda:{config.local_rank}'), - config.local_rank, - logger, - config.world_size, - use_dp=config.ddp.use_dp, - ) - else: - self_policy.to(device) - - optimizer = torch.optim.Adam(self_policy.parameters(), lr=float(config.il.lr)) - - lr_scheduler = None - if config.il.lr_schedule.use: - if config.il.lr_schedule.type == 'cosine': - lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - optimizer, - T_max=config.il.epochs, - eta_min=float(config.il.lr_schedule.min_lr), - ) - elif config.il.lr_schedule.type == 'step': - lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( - optimizer, - milestones=config.il.lr_schedule.decay_epochs, - gamma=float(config.il.lr_schedule.decay_factor), - ) - elif config.il.lr_schedule.type == 'linear': - lr_scheduler = torch.optim.lr_scheduler.LinearLR( - optimizer, - start_factor=config.il.lr_schedule.warmup_factor, - end_factor=1.0, - total_iters=config.il.lr_schedule.warmup_epochs, - ) - - return self_policy, optimizer, lr_scheduler, start_epoch diff --git a/scripts/train/configs/challenge_train_kujiale_cfg.py b/scripts/train/configs/challenge_train_kujiale_cfg.py index 0b8c5d80..1c1eea78 100644 --- a/scripts/train/configs/challenge_train_kujiale_cfg.py +++ b/scripts/train/configs/challenge_train_kujiale_cfg.py @@ -39,8 +39,7 @@ warmup_ratio=0.1, save_filter_frozen_weights=True, load_from_ckpt=False, - ckpt_to_load='checkpoints/r2r/fine_tuned/rdp/checkpoint-104150', - load_from_pretrain=True, + ckpt_to_load='', dataset_r2r_root_dir='interiornav_data/raw_data/', dataset_3dgs_root_dir='', dataset_grutopia10_root_dir='', @@ -48,7 +47,7 @@ lerobot_features_dir='data/vln_pe/traj_data/interior', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/challenge_train_mp3d_cfg.py b/scripts/train/configs/challenge_train_mp3d_cfg.py index 694c599a..d9cc90e2 100644 --- a/scripts/train/configs/challenge_train_mp3d_cfg.py +++ b/scripts/train/configs/challenge_train_mp3d_cfg.py @@ -40,7 +40,6 @@ save_filter_frozen_weights=True, load_from_ckpt=False, ckpt_to_load='', - load_from_pretrain=True, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', dataset_grutopia10_root_dir='', @@ -48,7 +47,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/cma.py b/scripts/train/configs/cma.py index 28279494..ef68685a 100644 --- a/scripts/train/configs/cma.py +++ b/scripts/train/configs/cma.py @@ -30,14 +30,13 @@ batch_size=2, lr=1e-4, num_workers=8, - weight_decay=1e-5, - warmup_ratio=0.05, + weight_decay=1e-5, + warmup_ratio=0.05, use_iw=True, inflection_weight_coef=3.2, save_filter_frozen_weights=False, load_from_ckpt=False, ckpt_to_load='', - load_from_pretrain=False, lmdb_map_size=1e12, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', @@ -46,7 +45,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/cma_plus.py b/scripts/train/configs/cma_plus.py index e083663c..d4d48782 100644 --- a/scripts/train/configs/cma_plus.py +++ b/scripts/train/configs/cma_plus.py @@ -30,14 +30,13 @@ batch_size=2, lr=1e-4, num_workers=8, - weight_decay=1e-5, - warmup_ratio=0.05, + weight_decay=1e-5, + warmup_ratio=0.05, use_iw=True, inflection_weight_coef=3.2, save_filter_frozen_weights=False, load_from_ckpt=False, ckpt_to_load='checkpoints/r2r/zero_shot/cma', - load_from_pretrain=False, lmdb_map_size=1e12, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', @@ -46,7 +45,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/navdp.py b/scripts/train/configs/navdp.py index cd8f9433..4085f8a8 100644 --- a/scripts/train/configs/navdp.py +++ b/scripts/train/configs/navdp.py @@ -2,7 +2,6 @@ from internnav.configs.trainer.eval import EvalCfg from internnav.configs.trainer.exp import ExpCfg from internnav.configs.trainer.il import FilterFailure, IlCfg, Loss -import os navdp_exp_cfg = ExpCfg( name='navdp_train', @@ -14,7 +13,7 @@ tensorboard_dir='checkpoints/%s/tensorboard', checkpoint_folder='checkpoints/%s/ckpts', log_dir='checkpoints/%s/logs', - local_rank= 0, + local_rank=0, # device = None, seed=0, eval=EvalCfg( @@ -41,7 +40,6 @@ save_filter_frozen_weights=False, load_from_ckpt=False, ckpt_to_load='', - load_from_pretrain=False, lmdb_map_size=1e12, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', @@ -50,8 +48,8 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='tensorboard', # wandb, tensorboard, none - dataset_navdp = 'data/datasets/navdp_dataset_lerobot.json', - root_dir = 'data/datasets/InternData-N1/vln_n1/traj_data', + dataset_navdp='data/datasets/navdp_dataset_lerobot.json', + root_dir='data/datasets/InternData-N1/vln_n1/traj_data', image_size=224, scene_scale=1.0, preload=False, @@ -66,7 +64,7 @@ dropout=0.1, scratch=False, finetune=False, - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/rdp.py b/scripts/train/configs/rdp.py index 694c599a..d9cc90e2 100644 --- a/scripts/train/configs/rdp.py +++ b/scripts/train/configs/rdp.py @@ -40,7 +40,6 @@ save_filter_frozen_weights=True, load_from_ckpt=False, ckpt_to_load='', - load_from_pretrain=True, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', dataset_grutopia10_root_dir='', @@ -48,7 +47,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/seq2seq.py b/scripts/train/configs/seq2seq.py index a6a8532e..fa54b142 100644 --- a/scripts/train/configs/seq2seq.py +++ b/scripts/train/configs/seq2seq.py @@ -37,7 +37,6 @@ save_filter_frozen_weights=False, load_from_ckpt=False, ckpt_to_load='', - load_from_pretrain=True, lmdb_map_size=1e12, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', @@ -46,7 +45,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, diff --git a/scripts/train/configs/seq2seq_plus.py b/scripts/train/configs/seq2seq_plus.py index fb3f3cfb..d159ea46 100644 --- a/scripts/train/configs/seq2seq_plus.py +++ b/scripts/train/configs/seq2seq_plus.py @@ -37,7 +37,6 @@ save_filter_frozen_weights=False, load_from_ckpt=False, ckpt_to_load='checkpoints/r2r/zero_shot/seq2seq', - load_from_pretrain=True, lmdb_map_size=1e12, dataset_r2r_root_dir='data/vln_pe/raw_data/r2r', dataset_3dgs_root_dir='', @@ -46,7 +45,7 @@ lerobot_features_dir='data/vln_pe/traj_data/r2r', camera_name='pano_camera_0', report_to='wandb', # wandb, tensorboard, none - ddp_find_unused_parameters = True, + ddp_find_unused_parameters=True, filter_failure=FilterFailure( use=True, min_rgb_nums=15, From 7f19848766af70e2423d3ce986f598c13a2dbd4b Mon Sep 17 00:00:00 2001 From: wangyukai Date: Tue, 9 Sep 2025 05:50:11 +0000 Subject: [PATCH 2/4] update new image v1.2 --- .pre-commit-config.yaml | 1 + challenge/README.md | 56 ++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e055621..9f39abd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,6 +29,7 @@ repos: rev: v2.2.1 hooks: - id: codespell + args: ['--ignore-words-list=ro'] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.1.0 hooks: diff --git a/challenge/README.md b/challenge/README.md index bc8b02d6..501a08bc 100644 --- a/challenge/README.md +++ b/challenge/README.md @@ -4,7 +4,7 @@ This track challenges participants to develop **multimodal navigation agents** that can interpret **natural language instructions** and operate within a **realistic physics-based simulation** environment. -Participants will deploy their agents on a **legged humanoid robot** (e.g., **Unitree H1**) to perform complex indoor navigation tasks using **egocentric visual inputs** and **language commands**. Agents must not only understand instructions but also perceive the environment, model trajectory history, and predict navigation actions in real time. +Participants will deploy their agents on a **legged humanoid robot** (e.g., **Unitree H1**) to perform complex indoor navigation tasks using **egocentric visual inputs** and **language commands**. Agents must not only understand instructions but also perceive the environment, model trajectory history, and predict navigation actions in real time. The system should be capable of handling challenges such as camera shake, height variation, and local obstacle avoidance, ultimately achieving robust and safe vision-and-language navigation. @@ -28,13 +28,13 @@ This guide provides a step-by-step walkthrough for participating in the **IROS 2 ## πŸ”— Useful Links -- πŸ” **Challenge Overview:** +- πŸ” **Challenge Overview:** [Challenge of Multimodal Robot Learning in InternUtopia and Real World](https://internrobotics.shlab.org.cn/challenge/2025/). -- πŸ“– **InternUtopia + InternNav Documentation:** +- πŸ“– **InternUtopia + InternNav Documentation:** [Getting Started](https://internrobotics.github.io/user_guide/internutopia/get_started/index.html) -- πŸš€ **Interactive Demo:** +- πŸš€ **Interactive Demo:** [InternNav Model Inference Demo](https://huggingface.co/spaces/InternRobotics/InternNav-Eval-Demo) @@ -43,12 +43,12 @@ This guide provides a step-by-step walkthrough for participating in the **IROS 2 ### Clone the InternNav repository to any desired location ```bash -$ git clone git@github.com:InternRobotics/InternNav.git +$ git clone git@github.com:InternRobotics/InternNav.git --recursive ``` ### Pull our base Docker image ```bash -$ docker pull crpi-mdum1jboc8276vb5.cn-beijing.personal.cr.aliyuncs.com/iros-challenge/internnav:v1.0 +$ docker pull crpi-mdum1jboc8276vb5.cn-beijing.personal.cr.aliyuncs.com/iros-challenge/internnav:v1.2 ``` ### Run the container @@ -96,8 +96,8 @@ $ git clone https://huggingface.co/datasets/spatialverse/InteriorAgent_Nav inter ``` Please refer to [document](https://internrobotics.github.io/user_guide/internnav/quick_start/installation.html#interndata-n1-dataset-preparation) for a full guide on InternData-N1 Dataset Preparation. In this challenge, we used test on the VLN-PE part of the [InternData-N1](https://huggingface.co/datasets/InternRobotics/InternData-N1) dataset. Optional: please feel free to download the full dataset to train your model. -- Download the [**IROS-2025-Challenge-Nav Dataset**](https://huggingface.co/datasets/InternRobotics/IROS-2025-Challenge-Nav/tree/main) for the `vln_pe/`, -- Download the [SceneData-N1](https://huggingface.co/datasets/InternRobotics/Scene-N1/tree/main) for the `scene_data/`, +- Download the [**IROS-2025-Challenge-Nav Dataset**](https://huggingface.co/datasets/InternRobotics/IROS-2025-Challenge-Nav/tree/main) for the `vln_pe/`, +- Download the [SceneData-N1](https://huggingface.co/datasets/InternRobotics/Scene-N1/tree/main) for the `scene_data/`, - Download the [Embodiments](https://huggingface.co/datasets/InternRobotics/Embodiments) for the `Embodiments/` ```bash @@ -114,7 +114,7 @@ $ git clone https://huggingface.co/datasets/InternRobotics/Embodiments data/Embo ### Suggested Dataset Directory Structure #### InternData-N1 ``` -data/ +data/ β”œβ”€β”€ Embodiments/ β”œβ”€β”€ scene_data/ β”‚ └── mp3d_pe/ @@ -130,7 +130,7 @@ data/ β”‚ └── val_unseen/ └── traj_data/ # training sample data for two types of scenes β”œβ”€β”€ interiornav/ - β”‚ └── kujiale_xxxx.tar.gz + β”‚ └── kujiale_xxxx.tar.gz └── r2r/ └── trajectory_0/ β”œβ”€β”€ data/ @@ -140,10 +140,10 @@ data/ #### Interior_data/ ```bash interiornav_data -β”œβ”€β”€ scene_data +β”œβ”€β”€ scene_data β”‚ β”œβ”€β”€ kujiale_xxxx/ β”‚ └── ... -└── raw_data +└── raw_data β”œβ”€β”€ train/ β”œβ”€β”€ val_seen/ └── val_unseen/ @@ -166,7 +166,7 @@ $ git submodule update --init ## πŸ› οΈ Model Training and Testing -Please refer to the [documentation](https://internrobotics.github.io/user_guide/internnav/quick_start/train_eval.html) for a quick-start guide to training or evaluating supported models in InternNav. +Please refer to the [documentation](https://internrobotics.github.io/user_guide/internnav/quick_start/train_eval.html) for a quick-start guide to training or evaluating supported models in InternNav. For advanced usage, including customizing datasets, models, and experimental settings, see the [tutorial](https://internrobotics.github.io/user_guide/internnav/tutorials/index.html). @@ -210,7 +210,7 @@ The main components include: - The evaluation process now can be viewed at `logs/`. Update `challenge_cfg.py` to get visualization output: - Set `eval_settings['vis_output']=True` to see saved frames and video during the evaluation trajectory - Set `env_settings['headless']=False` to open isaac-sim interactive window - output + output ### Create Your Model & Agent #### Custom Model @@ -223,7 +223,7 @@ action = self.agent.step(obs) obs = [{ 'globalgps': [X, Y, Z] # robot location 'globalrotation': [X, Y, Z, W] # robot orientation in quaternion - 'rgb': np.array(256, 256, 3) # rgb camera image + 'rgb': np.array(256, 256, 3) # rgb camera image 'depth': np.array(256, 256, 1) # depth image }] ``` @@ -237,7 +237,7 @@ action = List[int] # action for each environments ``` #### Create a Custom Config Class -In the model file, define a `Config` class that inherits from `PretrainedConfig`. +In the model file, define a `Config` class that inherits from `PretrainedConfig`. A reference implementation is `CMAModelConfig` in [`cma_model.py`](../internnav/model/cma/cma_policy.py). #### Registration and Integration @@ -248,7 +248,7 @@ In [`internnav/model/__init__.py`](../internnav/model/__init__.py): #### Create a Custom Agent -The Agent handles interaction with the environment, data preprocessing/postprocessing, and calls the Model for inference. +The Agent handles interaction with the environment, data preprocessing/postprocessing, and calls the Model for inference. A custom Agent usually inherits from [`Agent`](../internnav/agent/base.py) and implements the following key methods: - `reset()`: Resets the Agent's internal state (e.g., RNN states, action history). Called at the start of each episode. @@ -259,7 +259,7 @@ Example: [`CMAAgent`](../internnav/agent/cma_agent.py) #### Create a Trainer -The Trainer manages the training loop, including data loading, forward pass, loss calculation, and backpropagation. +The Trainer manages the training loop, including data loading, forward pass, loss calculation, and backpropagation. A custom trainer usually inherits from the [`Base Trainer`](../internnav/trainer/base.py) and implements: - `train_epoch()`: Runs one training epoch (batch iteration, forward pass, loss calculation, parameter update). @@ -310,7 +310,7 @@ Main fields: - `model_name`: Must match the name used during training - `ckpt_to_load`: Path to the model checkpoint - `task`: Define the tasks settings, number of env, scene, robots -- `dataset`: Load r2r or interiornav dataset +- `dataset`: Load r2r or interiornav dataset - `split`: Dataset split (`val_seen`, `val_unseen`, `test`, etc.) ## πŸ“¦ Packaging and Submission @@ -320,8 +320,8 @@ Main fields: Use this to evaluate your model on the validation split locally. The command is identical to what EvalAI runs, so it’s also a good sanity check before submitting. - Make sure your trained weights and model code are correctly packaged in your submitted Docker image at `/root/InternNav`. -- The evaluation configuration is properly set at: `scripts/eval/configs/challenge_cfg.py`. -- No need to include the `data` directory in your submission. +- The evaluation configuration is properly set at: `scripts/eval/configs/challenge_cfg.py`. +- No need to include the `data` directory in your submission. ```bash # Run local benchmark on the validation set $ bash challenge/start_eval_iros.sh --config scripts/eval/configs/challenge_cfg.py --split [val_seen/val_unseen] @@ -338,7 +338,7 @@ $ cd PATH/TO/INTERNNAV/ # Build the new image $ docker build -t my-internnav-custom:v1 . ``` -Or commit your container as new image: +Or commit your container as new image: ```bash $ docker commit internnav my-internnav-with-updates:v1 @@ -443,15 +443,15 @@ For detailed submission guidelines and troubleshooting, refer to the official Ev ### πŸ§ͺ Simulation Environment - **Platform**: Physics-driven simulation using [InternUtopia](https://github.com/InternRobotics/InternUtopia) -- **Robot**: Unitree H1 humanoid robot model -- **Tasks**: Instruction-based navigation in richly furnished indoor scenes +- **Robot**: Unitree H1 humanoid robot model +- **Tasks**: Instruction-based navigation in richly furnished indoor scenes - **Evaluation**: Based on success rate, path efficiency, and instruction compliance ### πŸ” Evaluation Metrics -- **Success Rate (SR)**: Proportion of episodes where the agent reaches the goal location within 3m +- **Success Rate (SR)**: Proportion of episodes where the agent reaches the goal location within 3m - **SPL**: Success weighted by Path Length - **Trajectory Length (TL)**: Total length of the trajectory (m) - **Navigation Error (NE)**: Euclidean distance between the agent's final position and the goal (m) @@ -463,8 +463,8 @@ For detailed submission guidelines and troubleshooting, refer to the official Ev ### 🚨 Challenges to Solve -- βœ… Integrating vision, language, and control into a single inference pipeline -- βœ… Overcoming sensor instability and actuation delay from simulated humanoid locomotion +- βœ… Integrating vision, language, and control into a single inference pipeline +- βœ… Overcoming sensor instability and actuation delay from simulated humanoid locomotion - βœ… Ensuring real-time, smooth, and goal-directed behavior under physics constraints This track pushes the boundary of embodied AI by combining **natural language understanding**, **3D vision**, and **realistic robot control**, fostering solutions ready for future real-world deployments. @@ -487,4 +487,4 @@ For more details with in-depth physical analysis results on the VLN task, please - **Organizer**: Shanghai AI Lab - **Co-organizers**: ManyCore Tech, University of Adelaide - **Data Contributions**: Online test data provided by Prof. Qi Wu's team; Kujiale scenes provided by ManyCore Tech -- **Sponsors** (in no particular order): ByteDance, HUAWEI, ENGINEAI, HONOR, ModelScope, Alibaba Cloud, AGILEX, DOBOT \ No newline at end of file +- **Sponsors** (in no particular order): ByteDance, HUAWEI, ENGINEAI, HONOR, ModelScope, Alibaba Cloud, AGILEX, DOBOT From 05dfc9ad7a1baa162573859f938374a0cf065ce1 Mon Sep 17 00:00:00 2001 From: wangyukai Date: Tue, 9 Sep 2025 06:21:28 +0000 Subject: [PATCH 3/4] update section --- challenge/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/challenge/README.md b/challenge/README.md index 501a08bc..42a85fe1 100644 --- a/challenge/README.md +++ b/challenge/README.md @@ -9,6 +9,9 @@ Participants will deploy their agents on a **legged humanoid robot** (e.g., **Un The system should be capable of handling challenges such as camera shake, height variation, and local obstacle avoidance, ultimately achieving robust and safe vision-and-language navigation. --- +## Updates +- We have fixed possible memory leak inside InternUtopia. Please pull the latest image v1.2 to use. +- For submission, please make sure the image contain `screen`. Quick check: `$ screen --version`. ## πŸ“‹ Table of Contents - [πŸ“š Getting Started](#-get-started) From a4d735d85faae096e20da2403aef00a93ebc3848 Mon Sep 17 00:00:00 2001 From: wangyukai Date: Tue, 9 Sep 2025 06:24:12 +0000 Subject: [PATCH 4/4] add symbol --- challenge/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/challenge/README.md b/challenge/README.md index 42a85fe1..79337d25 100644 --- a/challenge/README.md +++ b/challenge/README.md @@ -9,7 +9,7 @@ Participants will deploy their agents on a **legged humanoid robot** (e.g., **Un The system should be capable of handling challenges such as camera shake, height variation, and local obstacle avoidance, ultimately achieving robust and safe vision-and-language navigation. --- -## Updates +## πŸ†• Updates - We have fixed possible memory leak inside InternUtopia. Please pull the latest image v1.2 to use. - For submission, please make sure the image contain `screen`. Quick check: `$ screen --version`.