diff --git a/README.md b/README.md index 5aa1c37a..40919191 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ The toolbox supports the most comprehensive 6 datasets \& benchmarks and 10+ pop The toolbox supports the most advanced high-quality navigation dataset, InternData-N1, which includes 3k+ scenes and 830k VLN data covering diverse embodiments and scenes, and the first dual-system navigation foundation model with leading performance on all the benchmarks and zero-shot generalization capability in the real world, InternVLA-N1. ## 🔥 -- [2025/10] Add a simple [inference-only demo](scripts/eval/inference_only_demo.ipynb) of InternVLA-N1. +- [2025/10] Add a simple [inference-only demo](scripts/notebooks/inference_only_demo.ipynb) of InternVLA-N1. - [2025/10] InternVLA-N1 [technique report](https://internrobotics.github.io/internvla-n1.github.io/static/pdfs/InternVLA_N1.pdf) is released. Please check our [homepage](https://internrobotics.github.io/internvla-n1.github.io/). -- [2025/09] Real-world deployment code of InternVLA-N1 is released.- [2025/09] Real-world deployment code of InternVLA-N1 is released. +- [2025/09] Real-world deployment code of InternVLA-N1 is released. - [2025/07] We are hosting 🏆IROS 2025 Grand Challenge, stay tuned at [official website](https://internrobotics.shlab.org.cn/challenge/2025/). - [2025/07] InternNav v0.1.1 released. diff --git a/challenge/onsite_competition/captures/rs_depth_mm.png b/challenge/onsite_competition/captures/rs_depth_mm.png deleted file mode 100644 index 7c46c358..00000000 Binary files a/challenge/onsite_competition/captures/rs_depth_mm.png and /dev/null differ diff --git a/challenge/onsite_competition/captures/rs_depth_vis.png b/challenge/onsite_competition/captures/rs_depth_vis.png deleted file mode 100644 index 85a629b8..00000000 Binary files a/challenge/onsite_competition/captures/rs_depth_vis.png and /dev/null differ diff --git a/internnav/habitat_extensions/evaluator.py b/internnav/projects/habitat_extensions/evaluator.py similarity index 100% rename from internnav/habitat_extensions/evaluator.py rename to internnav/projects/habitat_extensions/evaluator.py diff --git a/internnav/habitat_extensions/evaluator_single.py b/internnav/projects/habitat_extensions/evaluator_single.py similarity index 75% rename from internnav/habitat_extensions/evaluator_single.py rename to internnav/projects/habitat_extensions/evaluator_single.py index 26f01620..da2a1574 100644 --- a/internnav/habitat_extensions/evaluator_single.py +++ b/internnav/projects/habitat_extensions/evaluator_single.py @@ -1,55 +1,45 @@ import argparse -import os import copy -import json -from typing import Any -from collections import OrderedDict -from PIL import Image, ImageDraw, ImageFont -import numpy as np -import tqdm -import quaternion import itertools +import os import random import re -import cv2 import sys -import time +from collections import OrderedDict +from typing import Any +import cv2 +import habitat +import numpy as np +import quaternion import torch -from torch import Tensor -from transformers.image_utils import to_numpy_array from depth_camera_filtering import filter_depth - - -from omegaconf import OmegaConf -import habitat -from habitat import logger, Env +from habitat import Env from habitat.config.default import get_agent_config -from habitat_baselines.config.default import get_config as get_habitat_config -from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower from habitat.config.default_structured_configs import ( CollisionsMeasurementConfig, FogOfWarConfig, TopDownMapMeasurementConfig, ) +from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower from habitat.utils.visualizations.utils import images_to_video, observations_to_image -from transformers import ( - AutoTokenizer, - AutoProcessor, - Qwen2_5_VLForConditionalGeneration, -) - - +from habitat_baselines.config.default import get_config as get_habitat_config +from omegaconf import OmegaConf +from PIL import Image +from transformers import AutoProcessor +from transformers.image_utils import to_numpy_array PROJECT_ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(PROJECT_ROOT_PATH) print(f"PROJECT_ROOT_PATH {PROJECT_ROOT_PATH}") from internnav.model.basemodel.internvla_n1.internvla_n1 import InternVLAN1ForCausalLM -from internnav.model.utils.vln_utils import rho_theta, image_resize, traj_to_actions, chunk_token, split_and_clean, open_image -from internnav.habitat_extensions import measures +from internnav.model.utils.vln_utils import ( + chunk_token, + split_and_clean, + traj_to_actions, +) from internnav.utils.dist import * - DEFAULT_IMAGE_TOKEN = "" @@ -119,52 +109,53 @@ def __init__( camera_fov_rad = np.deg2rad(self.sim_sensors_config.depth_sensor.hfov) self._camera_fov = camera_fov_rad self._fx = self._fy = self.sim_sensors_config.depth_sensor.width / (2 * np.tan(camera_fov_rad / 2)) - + self.model = model self.processor = processor - - - prompt = f"You are an autonomous navigation assistant. Your task is to . Where should you go next to stay on track? Please output the next waypoint\'s coordinates in the image. Please output STOP when you have successfully completed the task." + prompt = f"You are an autonomous navigation assistant. Your task is to . Where should you go next to stay on track? Please output the next waypoint\'s coordinates in the image. Please output STOP when you have successfully completed the task." answer = "" self.conversation = [{"from": "human", "value": prompt}, {"from": "gpt", "value": answer}] - + self.conjunctions = [ - 'you can see ', - 'in front of you is ', - 'there is ', - 'you can spot ', - 'you are toward the ', - 'ahead of you is ', - 'in your sight is ' - ] - - self.actions2idx = OrderedDict({ - 'STOP': [0], - "↑": [1], - "←": [2], - "→": [3], - "↓": [5], - }) - + 'you can see ', + 'in front of you is ', + 'there is ', + 'you can spot ', + 'you are toward the ', + 'ahead of you is ', + 'in your sight is ', + ] + + self.actions2idx = OrderedDict( + { + 'STOP': [0], + "↑": [1], + "←": [2], + "→": [3], + "↓": [5], + } + ) + self.num_frames = args.num_frames self.num_future_steps = args.num_future_steps self.num_history = args.num_history - - - def preprocess_depth_image_v2(self, depth_image, do_depth_scale=True, depth_scale=1000, target_height=None, target_width=None): + + def preprocess_depth_image_v2( + self, depth_image, do_depth_scale=True, depth_scale=1000, target_height=None, target_width=None + ): if target_height is None: target_height = self.image_processor.crop_size['height'] # 384 - target_width = self.image_processor.crop_size['width'] # 384 - + target_width = self.image_processor.crop_size['width'] # 384 + resized_depth_image = depth_image.resize((target_width, target_height), Image.NEAREST) - + img = to_numpy_array(resized_depth_image) if do_depth_scale: img = img / depth_scale - + return img, (target_width, target_height) - + def get_intrinsic_matrix(self, sensor_cfg) -> np.ndarray: width = sensor_cfg.width height = sensor_cfg.height @@ -174,19 +165,16 @@ def get_intrinsic_matrix(self, sensor_cfg) -> np.ndarray: cx = (width - 1.0) / 2.0 cy = (height - 1.0) / 2.0 - intrinsic_matrix = np.array([ - [fx, 0.0, cx, 0.0], - [ 0.0, fy, cy, 0.0], - [ 0.0, 0.0, 1.0, 0.0], - [ 0.0, 0.0, 0.0, 1.0] - ]) + intrinsic_matrix = np.array( + [[fx, 0.0, cx, 0.0], [0.0, fy, cy, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]] + ) return intrinsic_matrix - + def preprocess_instrinsic(self, intrinsic, ori_size, target_size): # (V, 4, 4) (resize_shape) (h, w) intrinsic = copy.deepcopy(intrinsic) if len(intrinsic.shape) == 2: intrinsic = intrinsic[None, :, :] # (1, 4, 4) or (B, 4, 4) - + intrinsic[:, 0] /= ori_size[0] / target_size[0] # width intrinsic[:, 1] /= ori_size[1] / target_size[1] # height @@ -197,11 +185,11 @@ def preprocess_instrinsic(self, intrinsic, ori_size, target_size): # (V, 4, 4) intrinsic = intrinsic.squeeze(0) return intrinsic - + def get_axis_align_matrix(self): ma = np.array([[0, 0, 1, 0], [-1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]]) return ma - + def xyz_yaw_to_tf_matrix(self, xyz: np.ndarray, yaw: float) -> np.ndarray: x, y, z = xyz transformation_matrix = np.array( @@ -225,7 +213,7 @@ def xyz_pitch_to_tf_matrix(self, xyz: np.ndarray, pitch: float) -> np.ndarray: ] ) return transformation_matrix - + def xyz_yaw_pitch_to_tf_matrix(self, xyz: np.ndarray, yaw: float, pitch: float) -> np.ndarray: """Converts a given position and yaw, pitch angles to a 4x4 transformation matrix. @@ -243,7 +231,7 @@ def xyz_yaw_pitch_to_tf_matrix(self, xyz: np.ndarray, yaw: float, pitch: float) transformation_matrix[:3, :3] = rot1 @ rot2 transformation_matrix[:3, 3] = xyz return transformation_matrix - + def pixel_to_gps(self, pixel, depth, intrinsic, tf_camera_to_episodic): ''' Args: @@ -269,15 +257,15 @@ def pixel_to_gps(self, pixel, depth, intrinsic, tf_camera_to_episodic): x = point_episodic[0] y = point_episodic[1] - return (x, y) # same as habitat gps - + return (x, y) # same as habitat gps + def config_env(self) -> Env: env = Env(config=self.config) # env.episodes = env.episodes[0:1] return env - + def run_single_eval(self): - + self.model.eval() self.env = self.config_env() self.scene_episode_dict = {} @@ -285,31 +273,41 @@ def run_single_eval(self): if episode.scene_id not in self.scene_episode_dict: self.scene_episode_dict[episode.scene_id] = [] self.scene_episode_dict[episode.scene_id].append(episode) - intrinsic_matrix = self.get_intrinsic_matrix(self.config.habitat.simulator.agents.main_agent.sim_sensors.rgb_sensor) + intrinsic_matrix = self.get_intrinsic_matrix( + self.config.habitat.simulator.agents.main_agent.sim_sensors.rgb_sensor + ) sucs, spls, oss, nes = [], [], [], [] done_res = [] - if True: # fixme + if True: # fixme scenes_keys = list(sorted(self.scene_episode_dict.keys())) self.infer_success = False self.infer_data_ready = False - print('---------------current infer scence:', scenes_keys[self.infer_scene_id]) + print('---------------current infer scene:', scenes_keys[self.infer_scene_id]) selected_scenes = ['17DRP5sb8fy', 'r1Q1Z4BcV1o', 'dhjEzFoUFzH'] - key_name = 'data/scene_datasets/mp3d/' + selected_scenes[self.infer_scene_id] + '/' + selected_scenes[self.infer_scene_id] + '.glb' + key_name = ( + 'data/scene_datasets/mp3d/' + + selected_scenes[self.infer_scene_id] + + '/' + + selected_scenes[self.infer_scene_id] + + '.glb' + ) episodes = self.scene_episode_dict[key_name] step_size = len(episodes) // 6 # episode_id = 0 - + episode = episodes[self.infer_episode_id * step_size] - episode_instruction = self.infer_instruction# episode.instruction.instruction_text if 'objectnav' not in self.config_path else episode.object_category + episode_instruction = ( + self.infer_instruction + ) # episode.instruction.instruction_text if 'objectnav' not in self.config_path else episode.object_category print("episode start", episode_instruction) episode_id = int(episode.episode_id) env = self.env env.current_episode = episode observations = env.reset() - + agent_state = env.sim.get_agent_state() rotation = agent_state.rotation translation = agent_state.position @@ -317,16 +315,14 @@ def run_single_eval(self): transformation_matrix = np.eye(4) transformation_matrix[:3, :3] = rotation_matrix transformation_matrix[:3, 3] = translation - - agent = ShortestPathFollower( - env.sim, 0.25, False - ) - + + agent = ShortestPathFollower(env.sim, 0.25, False) + os.makedirs(os.path.join(self.output_path, f'check_sim_{self.epoch}'), exist_ok=True) - + vis_frames = [] step_id = 0 - + if self.save_video: os.makedirs(self.output_path, exist_ok=True) initial_height = env.sim.get_agent_state().position[1] @@ -336,17 +332,17 @@ def run_single_eval(self): action_seq = [] past_key_values = None output_ids = None - + goal = None action = None look_down_observations = None look_down_id_list = [] messages = [] - last_action=None + last_action = None local_actions = [] - + # begin evaluation main loop - while not env.episode_over and step_id <=70: + while not env.episode_over and step_id <= 70: rgb = observations["rgb"] depth = observations["depth"] x, y = observations["gps"] @@ -356,23 +352,32 @@ def run_single_eval(self): depth = depth * 1000 agent_state = env.sim.get_agent_state() - height = agent_state.position[1] - initial_height # Habitat GPS makes west negative, so flip y + height = agent_state.position[1] - initial_height # Habitat GPS makes west negative, so flip y camera_position = np.array([x, -y, self._camera_height + height]) robot_xy = camera_position[:2] # tf_camera_to_episodic = self.xyz_yaw_to_tf_matrix(camera_position, camera_yaw) @ self.get_axis_align_matrix() - tf_camera_to_episodic = self.xyz_yaw_pitch_to_tf_matrix(camera_position, camera_yaw, np.deg2rad(30)) @ self.get_axis_align_matrix() - - image = Image.fromarray(rgb).convert('RGB') # raw observation image - image_size = image.size #640*480 + tf_camera_to_episodic = ( + self.xyz_yaw_pitch_to_tf_matrix(camera_position, camera_yaw, np.deg2rad(30)) + @ self.get_axis_align_matrix() + ) + + image = Image.fromarray(rgb).convert('RGB') # raw observation image + image_size = image.size # 640*480 save_raw_image = image.copy() - + if action == 5: - look_down_image = image #Image.fromarray(look_down_observations['rgb']).convert('RGB') + look_down_image = image # Image.fromarray(look_down_observations['rgb']).convert('RGB') save_raw_image = look_down_image.copy() # rgb_list.append(look_down_image) - look_down_depth, resize_shape = self.preprocess_depth_image_v2(Image.fromarray(depth.astype(np.uint16), mode='I;16'), do_depth_scale=True, depth_scale=1000, target_height=224, target_width=224) - look_down_depth = torch.as_tensor(np.ascontiguousarray(look_down_depth)).float() # [H, W] + look_down_depth, resize_shape = self.preprocess_depth_image_v2( + Image.fromarray(depth.astype(np.uint16), mode='I;16'), + do_depth_scale=True, + depth_scale=1000, + target_height=224, + target_width=224, + ) + look_down_depth = torch.as_tensor(np.ascontiguousarray(look_down_depth)).float() # [H, W] ### depth clip to 5m look_down_depth[look_down_depth > 5.0] = 5.0 else: @@ -387,131 +392,131 @@ def run_single_eval(self): depth = filter_depth(depth.reshape(depth.shape[:2]), blur_type=None) depth = depth * (self._max_depth - self._min_depth) + self._min_depth depth = depth * 1000 - look_down_depth, resize_shape = self.preprocess_depth_image_v2(Image.fromarray(depth.astype(np.uint16), mode='I;16'), do_depth_scale=True, depth_scale=1000, target_height=224, target_width=224) - look_down_depth = torch.as_tensor(np.ascontiguousarray(look_down_depth)).float() # [H, W] + look_down_depth, resize_shape = self.preprocess_depth_image_v2( + Image.fromarray(depth.astype(np.uint16), mode='I;16'), + do_depth_scale=True, + depth_scale=1000, + target_height=224, + target_width=224, + ) + look_down_depth = torch.as_tensor(np.ascontiguousarray(look_down_depth)).float() # [H, W] ### depth clip to 5m look_down_depth[look_down_depth > 5.0] = 5.0 env.step(4) env.step(4) - info = env.get_metrics() current_frame_infer_pixel = False if len(action_seq) == 0 and goal is None: # 只有执行完一次输出的所有 action_seq 才能继续做模型推理 - if action != 5: + if action != 5: sources = copy.deepcopy(self.conversation) if 'objectnav' in self.config_path: - sources[0]["value"] = sources[0]["value"].replace('.', random.choice(self.objectnav_instructions).format(target_object=episode.object_category.replace('_', ' '))) - else: - sources[0]["value"] = sources[0]["value"].replace('.', episode_instruction[:-1]) - cur_images = rgb_list[-1:] # current observation + sources[0]["value"] = sources[0]["value"].replace( + '.', + random.choice(self.objectnav_instructions).format( + target_object=episode.object_category.replace('_', ' ') + ), + ) + else: + sources[0]["value"] = sources[0]["value"].replace( + '.', episode_instruction[:-1] + ) + cur_images = rgb_list[-1:] # current observation if step_id == 0: history_id = [] else: - history_id = np.unique(np.linspace(0, step_id - 1, self.num_history, dtype=np.int32)).tolist() + history_id = np.unique( + np.linspace(0, step_id - 1, self.num_history, dtype=np.int32) + ).tolist() placeholder = (DEFAULT_IMAGE_TOKEN + '\n') * len(history_id) sources[0]["value"] += f' These are your historical observations: {placeholder}.' - + history_id = sorted(history_id) print('history_idddddddd', step_id, history_id) input_images = [rgb_list[i] for i in history_id] + cur_images input_img_id = 0 - else: - assert action == 5 # last action is look down + else: + assert action == 5 # last action is look down sources = [{"from": "human", "value": ""}, {"from": "gpt", "value": ""}] input_images += [look_down_image] - messages.append({ - 'role': 'assistant', - 'content': [ - { - 'type': 'text', - 'text': llm_outputs - } - ] - }) + messages.append({'role': 'assistant', 'content': [{'type': 'text', 'text': llm_outputs}]}) input_img_id = -1 - + prompt = random.choice(self.conjunctions) + DEFAULT_IMAGE_TOKEN sources[0]["value"] += f" {prompt}." print('sources', step_id, sources) prompt_instruction = copy.deepcopy(sources[0]["value"]) parts = split_and_clean(prompt_instruction) - + content = [] - for i in range (len(parts)): + for i in range(len(parts)): if parts[i] == "": content.append({"type": "image", "image": input_images[input_img_id]}) - input_img_id +=1 + input_img_id += 1 else: - content.append({"type": "text", "text": parts[i]}) - - messages.append({ - 'role': 'user', - 'content': content - }) - + content.append({"type": "text", "text": parts[i]}) + + messages.append({'role': 'user', 'content': content}) + print('step_id', step_id, 'messages:', messages) - - text = self.processor.apply_chat_template( - messages,tokenize=False, add_generation_prompt=True - ) + + text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = self.processor(text=[text], images=input_images, return_tensors="pt").to(self.model.device) with torch.no_grad(): output_ids = self.model.generate(**inputs, max_new_tokens=128, do_sample=False) - - llm_outputs = self.processor.tokenizer.decode(output_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) + llm_outputs = self.processor.tokenizer.decode( + output_ids[0][inputs.input_ids.shape[1] :], skip_special_tokens=True + ) print('step_id:', step_id, 'output text:', llm_outputs) - - if bool(re.search(r'\d', llm_outputs)): # output pixel goal + + if bool(re.search(r'\d', llm_outputs)): # output pixel goal current_frame_infer_pixel = True forward_action = 0 coord = [int(c) for c in re.findall(r'\d+', llm_outputs)] pixel_goal = [int(coord[1]), int(coord[0])] # switch the goal o - + goal = self.pixel_to_gps(pixel_goal, depth / 1000, intrinsic_matrix, tf_camera_to_episodic) print('before', goal, depth.shape) - goal = (transformation_matrix @ np.array([-goal[1], 0, -goal[0], 1]))[:3] - + goal = (transformation_matrix @ np.array([-goal[1], 0, -goal[0], 1]))[:3] + if not env.sim.pathfinder.is_navigable(np.array(goal)): goal = np.array(env.sim.pathfinder.snap_point(np.array(goal))) - + # look down --> horizontal env.step(4) env.step(4) - + # action = agent.get_next_action(goal) local_actions = [] pixel_values = inputs.pixel_values - image_grid_thw = torch.cat( - [thw.unsqueeze(0) for thw in inputs.image_grid_thw], dim=0 - ) - + image_grid_thw = torch.cat([thw.unsqueeze(0) for thw in inputs.image_grid_thw], dim=0) + with torch.no_grad(): traj_latents = self.model.generate_latents(output_ids, pixel_values, image_grid_thw) - + image_dp = torch.tensor(np.array(look_down_image.resize((224, 224)))).to(torch.bfloat16) pix_goal_image = copy.copy(image_dp) images_dp = torch.stack([pix_goal_image, image_dp]).unsqueeze(0) depth_dp = look_down_depth.unsqueeze(-1).to(torch.bfloat16) pix_goal_depth = copy.copy(depth_dp) depths_dp = torch.stack([pix_goal_depth, depth_dp]).unsqueeze(0) - + with torch.no_grad(): - dp_actions = self.model.generate_traj(traj_latents) - + dp_actions = self.model.generate_traj(traj_latents) + random_choice = np.random.choice(dp_actions.shape[0]) if self.args.continuous_traj: action_list = traj_to_actions(dp_actions) if len(action_list) < 8: - action_list += [0] * (8-len(action_list)) + action_list += [0] * (8 - len(action_list)) else: action_list = chunk_token(dp_actions[random_choice]) print("first action_list", action_list) - local_actions = action_list + local_actions = action_list action = local_actions[0] if action == 0: goal = None @@ -524,10 +529,10 @@ def run_single_eval(self): continue print('predicted goal', pixel_goal, goal, flush=True) - else: + else: action_seq = self.parse_actions(llm_outputs) print('actions', action_seq, flush=True) - + if len(action_seq) != 0: action = action_seq[0] action_seq.pop(0) @@ -536,14 +541,14 @@ def run_single_eval(self): action = local_actions.pop(0) else: action = 0 - forward_action +=1 + forward_action += 1 print('forward_action', forward_action, flush=True) if forward_action > 8: - goal = None + goal = None output_ids = None messages = [] step_id += 1 - forward_action =0 + forward_action = 0 local_actions = [] continue if action == 0: @@ -551,28 +556,38 @@ def run_single_eval(self): output_ids = None messages = [] step_id += 1 - forward_action =0 + forward_action = 0 local_actions = [] continue else: action = 0 - + if info['top_down_map'] is not None: frame = observations_to_image({'rgb': np.asarray(save_raw_image)}, info) if current_frame_infer_pixel: - frame = cv2.putText(frame, f"{pixel_goal[1], pixel_goal[0]}", (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) + frame = cv2.putText( + frame, + f"{pixel_goal[1], pixel_goal[0]}", + (50, 80), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (255, 0, 0), + 2, + ) frame = cv2.circle(frame, (pixel_goal[1], pixel_goal[0]), 5, (255, 0, 0), -1) else: output_str = str(action) - output_str = output_str.replace('1', "Go forward").replace('2', 'Turn left').replace('3', 'Turn right') + output_str = ( + output_str.replace('1', "Go forward").replace('2', 'Turn left').replace('3', 'Turn right') + ) output_str = output_str.replace('5', 'Look down').replace('0', 'Stop!') frame = cv2.putText(frame, output_str, (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) vis_frames.append(frame) if action == 5: vis_frames.append(frame) - + print("step_id", step_id, "action", action) - + if action == 5: env.step(action) observations = env.step(action) @@ -580,19 +595,23 @@ def run_single_eval(self): observations = env.step(action) step_id += 1 messages = [] - + self.infer_success_cnt += 1 - + metrics = env.get_metrics() - if self.save_video : - images_to_video( - vis_frames, self.output_path, f"res_{self.infer_success_cnt}",fps=6, quality=9 - ) + if self.save_video: + images_to_video(vis_frames, self.output_path, f"res_{self.infer_success_cnt}", fps=6, quality=9) self.infer_success = True vis_frames.clear() env.close() - return torch.tensor(sucs).to(self.device), torch.tensor(spls).to(self.device), torch.tensor(oss).to(self.device), torch.tensor(nes).to(self.device), torch.tensor(len(sucs)).to(self.device) + return ( + torch.tensor(sucs).to(self.device), + torch.tensor(spls).to(self.device), + torch.tensor(oss).to(self.device), + torch.tensor(nes).to(self.device), + torch.tensor(len(sucs)).to(self.device), + ) def parse_actions(self, output): action_patterns = '|'.join(re.escape(action) for action in self.actions2idx) @@ -607,13 +626,14 @@ def preprocess_qwenvl(self, source): prompt = random.choice(self.conjunctions) + DEFAULT_IMAGE_TOKEN if len(source[0]["value"]) != 0: source[0]["value"] += f" {prompt}." - else: - source[0]["value"] = f"{prompt}." # Please output the next waypoint\'s coordinates in the image." + else: + source[0]["value"] = f"{prompt}." # Please output the next waypoint\'s coordinates in the image." return source - + + def eval(): global local_rank - + parser = argparse.ArgumentParser() parser.add_argument("--local_rank", default=0, type=int, help="node rank") parser.add_argument("--model_path", type=str, default="") @@ -629,18 +649,14 @@ def eval(): parser.add_argument("--predict_step_nums", type=int, default=16) parser.add_argument("--continuous_traj", action="store_true", default=False) parser.add_argument("--max_new_tokens", type=int, default=1024) - - parser.add_argument('--world_size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--rank', default=0, type=int, - help='rank') - parser.add_argument('--gpu', default=0, type=int, - help='gpu') + + parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes') + parser.add_argument('--rank', default=0, type=int, help='rank') + parser.add_argument('--gpu', default=0, type=int, help='gpu') parser.add_argument('--port', default='2333') parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') - parser.add_argument('--device', default='cuda', - help='device to use for training / testing') - + parser.add_argument('--device', default='cuda', help='device to use for training / testing') + args = parser.parse_args() init_distributed_mode(args) local_rank = args.local_rank @@ -651,10 +667,9 @@ def eval(): device = torch.device(f"cuda:{local_rank}") model = InternVLAN1ForCausalLM.from_pretrained( - args.model_path, torch_dtype=torch.bfloat16, - attn_implementation="flash_attention_2", device_map={"": device} + args.model_path, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map={"": device} ) - + # start the evaluation evaluate(model, processor, args) @@ -670,9 +685,9 @@ def evaluate(model, processor, args): model=model, processor=processor, epoch=0, - args=args + args=args, ) - sucs, spls, oss, nes, ep_num = evaluator.eval_action(idx=get_rank()) + sucs, spls, oss, nes, ep_num = evaluator.eval_action(idx=get_rank()) if __name__ == "__main__": diff --git a/internnav/habitat_extensions/fonts/arial.ttf b/internnav/projects/habitat_extensions/fonts/arial.ttf similarity index 100% rename from internnav/habitat_extensions/fonts/arial.ttf rename to internnav/projects/habitat_extensions/fonts/arial.ttf diff --git a/internnav/habitat_extensions/maps.py b/internnav/projects/habitat_extensions/maps.py similarity index 90% rename from internnav/habitat_extensions/maps.py rename to internnav/projects/habitat_extensions/maps.py index 2ba9112a..68f62d73 100644 --- a/internnav/habitat_extensions/maps.py +++ b/internnav/projects/habitat_extensions/maps.py @@ -28,9 +28,7 @@ MAP_SHORTEST_PATH_WAYPOINT = 14 TOP_DOWN_MAP_COLORS = np.full((256, 3), 150, dtype=np.uint8) -TOP_DOWN_MAP_COLORS[15:] = cv2.applyColorMap( - np.arange(241, dtype=np.uint8), cv2.COLORMAP_JET -).squeeze(1)[:, ::-1] +TOP_DOWN_MAP_COLORS[15:] = cv2.applyColorMap(np.arange(241, dtype=np.uint8), cv2.COLORMAP_JET).squeeze(1)[:, ::-1] TOP_DOWN_MAP_COLORS[MAP_INVALID_POINT] = [255, 255, 255] # White TOP_DOWN_MAP_COLORS[MAP_VALID_POINT] = [150, 150, 150] # Light Grey TOP_DOWN_MAP_COLORS[MAP_BORDER_INDICATOR] = [50, 50, 50] # Grey @@ -73,9 +71,7 @@ def colorize_top_down_map( # Only desaturate valid points as only valid points get revealed desat_mask = top_down_map != MAP_INVALID_POINT - _map[desat_mask] = ( - _map * fog_of_war_desat_values[fog_of_war_mask] - ).astype(np.uint8)[desat_mask] + _map[desat_mask] = (_map * fog_of_war_desat_values[fog_of_war_mask]).astype(np.uint8)[desat_mask] return _map @@ -205,9 +201,7 @@ def draw_reference_path( pt_from = pt_to for pt in shortest_path_points: - drawpoint( - img, (pt[1], pt[0]), MAP_SHORTEST_PATH_WAYPOINT, meters_per_px - ) + drawpoint(img, (pt[1], pt[0]), MAP_SHORTEST_PATH_WAYPOINT, meters_per_px) def draw_straight_shortest_path_points( @@ -219,10 +213,7 @@ def draw_straight_shortest_path_points( """Draws the shortest path from start to goal assuming a standard discrete action space. """ - shortest_path_points = [ - habitat_maps.to_grid(p[2], p[0], img.shape[0:2], sim)[::-1] - for p in shortest_path_points - ] + shortest_path_points = [habitat_maps.to_grid(p[2], p[0], img.shape[0:2], sim)[::-1] for p in shortest_path_points] habitat_maps.draw_path( img, @@ -232,9 +223,7 @@ def draw_straight_shortest_path_points( ) -def draw_source_and_target( - img: np.ndarray, sim: Simulator, episode: VLNEpisode, meters_per_px: float -) -> None: +def draw_source_and_target(img: np.ndarray, sim: Simulator, episode: VLNEpisode, meters_per_px: float) -> None: s_x, s_y = habitat_maps.to_grid( episode.start_position[2], episode.start_position[0], @@ -285,18 +274,14 @@ def get_nearest_node(graph: nx.Graph, current_position: List[float]) -> str: for node in graph: node_pos = graph.nodes[node]["position"] node_pos = np.take(node_pos, (0, 2)) - cur_dist = np.linalg.norm( - np.array(node_pos) - np.array(current_position), ord=2 - ) + cur_dist = np.linalg.norm(np.array(node_pos) - np.array(current_position), ord=2) if cur_dist < dist: dist = cur_dist nearest = node return nearest -def update_nearest_node( - graph: nx.Graph, nearest_node: str, current_position: np.ndarray -) -> str: +def update_nearest_node(graph: nx.Graph, nearest_node: str, current_position: np.ndarray) -> str: """Determine the closest MP3D node to the agent's current position as given by a [x,z] position vector. The selected node must be reachable from the previous MP3D node as specified in the nav-graph edges. @@ -309,9 +294,7 @@ def update_nearest_node( for node in [nearest_node] + [e[1] for e in graph.edges(nearest_node)]: node_pos = graph.nodes[node]["position"] node_pos = np.take(node_pos, (0, 2)) - cur_dist = np.linalg.norm( - np.array(node_pos) - np.array(current_position), ord=2 - ) + cur_dist = np.linalg.norm(np.array(node_pos) - np.array(current_position), ord=2) if cur_dist < dist: dist = cur_dist nearest = node @@ -325,18 +308,14 @@ def draw_mp3d_nodes( graph: nx.Graph, meters_per_px: float, ) -> None: - n = get_nearest_node( - graph, (episode.start_position[0], episode.start_position[2]) - ) + n = get_nearest_node(graph, (episode.start_position[0], episode.start_position[2])) starting_height = graph.nodes[n]["position"][1] for node in graph: pos = graph.nodes[node]["position"] # no obvious way to differentiate between floors. Use this for now. if abs(pos[1] - starting_height) < 1.0: - r_x, r_y = habitat_maps.to_grid( - pos[2], pos[0], img.shape[0:2], sim - ) + r_x, r_y = habitat_maps.to_grid(pos[2], pos[0], img.shape[0:2], sim) # only paint if over a valid point if img[r_x, r_y]: @@ -388,4 +367,4 @@ def image_resize( img = img.permute(0, 1, 3, 4, 2) if no_batch_dim: img = img.squeeze(dim=0) # Removes the batch dimension - return img \ No newline at end of file + return img diff --git a/internnav/habitat_extensions/measures.py b/internnav/projects/habitat_extensions/measures.py similarity index 94% rename from internnav/habitat_extensions/measures.py rename to internnav/projects/habitat_extensions/measures.py index 048d4473..751bbf00 100644 --- a/internnav/habitat_extensions/measures.py +++ b/internnav/projects/habitat_extensions/measures.py @@ -1,23 +1,15 @@ -import gzip -import json -import pickle from typing import Any, List, Union import numpy as np + # from dtw import dtw # from fastdtw import fastdtw # from habitat.config import Config -from habitat.core.dataset import Episode -from habitat.core.embodied_task import Action, EmbodiedTask, Measure -from habitat.core.logging import logger +from habitat.core.embodied_task import EmbodiedTask, Measure from habitat.core.registry import registry from habitat.core.simulator import Simulator from habitat.core.utils import try_cv2_import -from habitat.tasks.nav.nav import DistanceToGoal, Success -from habitat.tasks.utils import cartesian_to_polar -from habitat.utils.geometry_utils import quaternion_rotate_vector -from habitat.utils.visualizations import fog_of_war -from habitat.utils.visualizations import maps as habitat_maps +from habitat.tasks.nav.nav import DistanceToGoal from numpy import ndarray # from utils import maps @@ -26,9 +18,7 @@ cv2 = try_cv2_import() -def euclidean_distance( - pos_a: Union[List[float], ndarray], pos_b: Union[List[float], ndarray] -) -> float: +def euclidean_distance(pos_a: Union[List[float], ndarray], pos_b: Union[List[float], ndarray]) -> float: return np.linalg.norm(np.array(pos_b) - np.array(pos_a), ord=2) @@ -54,9 +44,7 @@ def reset_metric(self, *args: Any, **kwargs: Any): def update_metric(self, *args: Any, **kwargs: Any): current_position = self._sim.get_agent_state().position - self._metric += euclidean_distance( - current_position, self._previous_position - ) + self._metric += euclidean_distance(current_position, self._previous_position) self._previous_position = current_position @@ -73,16 +61,12 @@ def _get_uuid(self, *args: Any, **kwargs: Any) -> str: return self.cls_uuid def reset_metric(self, *args: Any, task: EmbodiedTask, **kwargs: Any): - task.measurements.check_measure_dependencies( - self.uuid, [DistanceToGoal.cls_uuid] - ) + task.measurements.check_measure_dependencies(self.uuid, [DistanceToGoal.cls_uuid]) self._metric = float("inf") self.update_metric(task=task) def update_metric(self, *args: Any, task: EmbodiedTask, **kwargs: Any): - distance_to_target = task.measurements.measures[ - DistanceToGoal.cls_uuid - ].get_metric() + distance_to_target = task.measurements.measures[DistanceToGoal.cls_uuid].get_metric() self._metric = min(self._metric, distance_to_target) @@ -105,9 +89,7 @@ def _get_uuid(self, *args: Any, **kwargs: Any) -> str: return self.cls_uuid def reset_metric(self, *args: Any, task: EmbodiedTask, **kwargs: Any): - task.measurements.check_measure_dependencies( - self.uuid, [DistanceToGoal.cls_uuid] - ) + task.measurements.check_measure_dependencies(self.uuid, [DistanceToGoal.cls_uuid]) self._metric = 0.0 self.update_metric(task=task) diff --git a/scripts/eval/navigation_ui.py b/scripts/demo/navigation_ui.py similarity index 77% rename from scripts/eval/navigation_ui.py rename to scripts/demo/navigation_ui.py index 73d522f2..2f13bfda 100644 --- a/scripts/eval/navigation_ui.py +++ b/scripts/demo/navigation_ui.py @@ -1,32 +1,19 @@ -import base64 import json -import logging import os -import subprocess -import sys -import threading import time import uuid +from collections import defaultdict from datetime import datetime, timedelta -from typing import Dict, List, Optional +from typing import Optional import gradio as gr -import numpy as np -import open3d as o3d -import plotly.graph_objects as go import requests -from fastapi import APIRouter, FastAPI, HTTPException, status, BackgroundTasks, Response -from pydantic import BaseModel - -import asyncio -import uvicorn -from collections import defaultdict -BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8001") # fastapi server +BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8001") # fastapi server API_ENDPOINTS = { "submit_task": f"{BACKEND_URL}/predict/video", "query_status": f"{BACKEND_URL}/predict/task", - "get_result": f"{BACKEND_URL}/predict" + "get_result": f"{BACKEND_URL}/predict", } @@ -34,17 +21,17 @@ "scene_1": { "description": "Modern Apartment", "name": "17DRP5sb8fy", - "glb_path": "scene_assets/scene1_no_ceiling.glb" # PLY file path + "glb_path": "scene_assets/scene1_no_ceiling.glb", # PLY file path }, "scene_2": { "description": "Office Building", "name": "r1Q1Z4BcV1o", - "glb_path": "scene_assets/scene2_no_ceiling.glb" + "glb_path": "scene_assets/scene2_no_ceiling.glb", }, "scene_3": { "description": "University Campus", "name": "dhjEzFoUFzH", - "glb_path": "scene_assets/scene3_no_ceiling.glb" + "glb_path": "scene_assets/scene3_no_ceiling.glb", }, } @@ -60,17 +47,18 @@ }, "episode_4": { "description": "4", - } + }, } -MODEL_CHOICES = [] +MODEL_CHOICES = [] ############################################################################### SESSION_TASKS = {} IP_REQUEST_RECORDS = defaultdict(list) -IP_LIMIT = 5 +IP_LIMIT = 5 + def is_request_allowed(ip: str) -> bool: now = datetime.now() @@ -80,6 +68,7 @@ def is_request_allowed(ip: str) -> bool: return True return False + ############################################################################### @@ -89,18 +78,20 @@ def is_request_allowed(ip: str) -> bool: ACCESS_LOG = os.path.join(LOG_DIR, "access.log") SUBMISSION_LOG = os.path.join(LOG_DIR, "submissions.log") + def log_access(user_ip: str = None, user_agent: str = None): timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_entry = { "timestamp": timestamp, "type": "access", "user_ip": user_ip or "unknown", - "user_agent": user_agent or "unknown" + "user_agent": user_agent or "unknown", } - + with open(ACCESS_LOG, "a") as f: f.write(json.dumps(log_entry) + "\n") + def log_submission(scene: str, prompt: str, model: str, user: str = "anonymous", res: str = "unknown"): timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_entry = { @@ -110,16 +101,17 @@ def log_submission(scene: str, prompt: str, model: str, user: str = "anonymous", "scene": scene, "prompt": prompt, "model": model, - #"max_step": str(max_step), - "res": res + # "max_step": str(max_step), + "res": res, } - + with open(SUBMISSION_LOG, "a") as f: f.write(json.dumps(log_entry) + "\n") + def read_logs(log_type: str = "all", max_entries: int = 50) -> list: logs = [] - + if log_type in ["all", "access"]: try: with open(ACCESS_LOG, "r") as f: @@ -127,7 +119,7 @@ def read_logs(log_type: str = "all", max_entries: int = 50) -> list: logs.append(json.loads(line.strip())) except FileNotFoundError: pass - + if log_type in ["all", "submission"]: try: with open(SUBMISSION_LOG, "r") as f: @@ -135,36 +127,37 @@ def read_logs(log_type: str = "all", max_entries: int = 50) -> list: logs.append(json.loads(line.strip())) except FileNotFoundError: pass - - # Sorted by timestemp + + # Sorted by timestamp logs.sort(key=lambda x: x["timestamp"], reverse=True) return logs[:max_entries] + def format_logs_for_display(logs: list) -> str: if not logs: return "No log record" - + markdown = "### System Access Log\n\n" markdown += "| Time | Type | User/IP | Details |\n" markdown += "|------|------|---------|----------|\n" - + for log in logs: timestamp = log.get("timestamp", "unknown") log_type = "Access" if log.get("type") == "access" else "Submission" - + if log_type == "Access": user = log.get("user_ip", "unknown") details = f"User-Agent: {log.get('user_agent', 'unknown')}" else: user = log.get("user", "anonymous") result = log.get('res', 'unknown') - if result != "success": + if result != "success": if len(result) > 40: # Adjust this threshold as needed result = f"{result[:20]}...{result[-20:]}" details = f"Scene: {log.get('scene', 'unknown')}, Prompt: {log.get('prompt', '')}, Model: {log.get('model', 'unknown')}, result: {result}" - + markdown += f"| {timestamp} | {log_type} | {user} | {details} |\n" - + return markdown @@ -185,31 +178,22 @@ def submit_to_backend( "scene_index": scene_index, "episode_index": episode_index, } - - payload = { - "user": user, - "task": "robot_navigation", - "job_id": job_id, - "data": json.dumps(data) - } - + + payload = {"user": user, "task": "robot_navigation", "job_id": job_id, "data": json.dumps(data)} + try: headers = {"Content-Type": "application/json"} - response = requests.post( - API_ENDPOINTS["submit_task"], - json=payload, - headers=headers, - timeout=600 - ) + response = requests.post(API_ENDPOINTS["submit_task"], json=payload, headers=headers, timeout=600) return response.json() except Exception as e: return {"status": "error", "message": str(e)} + def get_task_status(task_id: str) -> dict: try: response = requests.get(f"{API_ENDPOINTS['query_status']}/{task_id}", timeout=600) try: - return response.json() + return response.json() except json.JSONDecodeError: return {"status": "error", "message": response.text} except Exception as e: @@ -218,22 +202,14 @@ def get_task_status(task_id: str) -> dict: def get_task_result(task_id: str) -> Optional[dict]: try: - response = requests.get( - f"{API_ENDPOINTS['get_result']}/{task_id}", - timeout=5 - ) + response = requests.get(f"{API_ENDPOINTS['get_result']}/{task_id}", timeout=5) return response.json() except Exception as e: print(f"Error fetching result: {e}") return None -def run_simulation( - scene: str, - prompt: str, - episode: str, - history: list, - request: gr.Request -) -> dict: + +def run_simulation(scene: str, prompt: str, episode: str, history: list, request: gr.Request) -> dict: model = "InternNav-VLA" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -245,14 +221,14 @@ def run_simulation( if not is_request_allowed(user_ip): log_submission(scene, prompt, model, user_ip, "IP blocked temporarily") raise gr.Error("Too many requests from this IP. Please wait and try again one minute later.") - + submission_result = submit_to_backend(scene, prompt, episode) print("submission_result: ", submission_result) if submission_result.get("status") != "pending": log_submission(scene, prompt, model, user_ip, "Submission failed") raise gr.Error(f"Submission failed: {submission_result.get('message', 'unknown issue')}") - + try: task_id = submission_result["task_id"] SESSION_TASKS[session_id] = task_id @@ -276,24 +252,19 @@ def run_simulation( time.sleep(1) if status.get("status") == "completed": import base64 + video_bytes = base64.b64decode(status.get("video")) receive_time = time.time() with open(f"received_video_{receive_time}.mp4", "wb") as f: f.write(video_bytes) video_path = f"received_video_{receive_time}.mp4" - new_entry = { - "timestamp": timestamp, - "scene": scene, - "model": model, - "prompt": prompt, - "video_path": video_path - } - + new_entry = {"timestamp": timestamp, "scene": scene, "model": model, "prompt": prompt, "video_path": video_path} + updated_history = history + [new_entry] - + if len(updated_history) > 10: updated_history = updated_history[:10] - + print("updated_history:", updated_history) log_submission(scene, prompt, model, user_ip, "success") gr.Info("Simulation completed successfully!") @@ -316,32 +287,42 @@ def run_simulation( log_submission(scene, prompt, model, user_ip, "missing task's status from backend") yield None, history + ################################################################################################################### def update_history_display(history: list) -> list: print("update_history_display") updates = [] - + for i in range(10): if i < len(history): entry = history[i] - updates.extend([ - gr.update(visible=True), - gr.update(visible=True, label=f"Simulation {i+1} scene: {entry['scene']}, prompt: {entry['prompt']}", open=False), - gr.update(value=entry['video_path'], visible=True), - gr.update(value=f"{entry['timestamp']}") - ]) + updates.extend( + [ + gr.update(visible=True), + gr.update( + visible=True, + label=f"Simulation {i+1} scene: {entry['scene']}, prompt: {entry['prompt']}", + open=False, + ), + gr.update(value=entry['video_path'], visible=True), + gr.update(value=f"{entry['timestamp']}"), + ] + ) print(f'update video') print(entry['video_path']) else: - updates.extend([ - gr.update(visible=False), - gr.update(visible=False), - gr.update(value=None, visible=False), - gr.update(value="") - ]) + updates.extend( + [ + gr.update(visible=False), + gr.update(visible=False), + gr.update(value=None, visible=False), + gr.update(value=""), + ] + ) print("update_history_display end!!") return updates + def update_scene_display(scene: str): print(f"update_scene_display {scene}") config = SCENE_CONFIGS.get(scene, {}) @@ -353,6 +334,7 @@ def update_scene_display(scene: str): return None, glb_path + def update_episode_display(scene: str, episode: str): print(f"update_episode_display {scene} {episode}") config = SCENE_CONFIGS.get(scene, {}) @@ -360,14 +342,18 @@ def update_episode_display(scene: str, episode: str): episode_id = int(episode[-1]) image_path = os.path.join("scene_assets", f"{scene_name}_{episode_id-1}.jpg") print(f"image_path {image_path}") - # vaild if file path exists + # valid if file path exists if not os.path.exists(image_path): return None return image_path + + def update_log_display(): logs = read_logs() return format_logs_for_display(logs) + + ############################################################################## @@ -382,7 +368,6 @@ def cleanup_session(request: gr.Request): print(f"Task Termination Failed: {task_id}: {e}") - ############################################################################### custom_css = """ @@ -419,11 +404,13 @@ def cleanup_session(request: gr.Request): """ with gr.Blocks(title="Robot Navigation Inference", css=custom_css) as demo: - gr.Markdown(""" + gr.Markdown( + """ # 🧭 Habitat Robot Navigation Demo ### Simulation Test Based on Habitat Framework - """) - + """ + ) + history_state = gr.State([]) with gr.Row(): @@ -435,7 +422,7 @@ def cleanup_session(request: gr.Request): choices=list(SCENE_CONFIGS.keys()), value="scene_1", interactive=True, - ) + ) episode_dropdown = gr.Dropdown( label="Select Start Position", choices=list(EPISODE_CONFIGS.keys()), @@ -444,41 +431,33 @@ def cleanup_session(request: gr.Request): ) with gr.Row(): - scene_preview = gr.Model3D(elem_classes=["scene-preview"], - camera_position=(90.0, 120, 20000.0), - #display_mode="solid" + scene_preview = gr.Model3D( + elem_classes=["scene-preview"], + camera_position=(90.0, 120, 20000.0), + # display_mode="solid" ) fps_preview = gr.Image(label="FPS Preview") - + scene_description = gr.Markdown("### Scene preview") - + prompt_input = gr.Textbox( label="Navigation Instruction", value="Exit the bedroom and turn left. Walk straight passing the gray couch and stop near the rug.", placeholder="e.g.: 'Exit the bedroom and turn left. Walk straight passing the gray couch and stop near the rug.'", lines=2, - max_lines=4 + max_lines=4, ) - + scene_dropdown.change( - update_scene_display, - inputs=scene_dropdown, - outputs=[scene_description, scene_preview] - ).then( - update_episode_display, - inputs=[scene_dropdown, episode_dropdown], - outputs=[fps_preview] - ) - + update_scene_display, inputs=scene_dropdown, outputs=[scene_description, scene_preview] + ).then(update_episode_display, inputs=[scene_dropdown, episode_dropdown], outputs=[fps_preview]) + episode_dropdown.change( - update_episode_display, - inputs=[scene_dropdown, episode_dropdown], - outputs=[fps_preview] + update_episode_display, inputs=[scene_dropdown, episode_dropdown], outputs=[fps_preview] ) - + submit_btn = gr.Button("Start Navigation Simulation", variant="primary") - with gr.Column(elem_id="result-panel"): gr.Markdown("### Latest Simulation Result") @@ -490,86 +469,72 @@ def cleanup_session(request: gr.Request): autoplay=True, # streaming=True ) - + with gr.Column() as history_container: gr.Markdown("### History") gr.Markdown("#### History will be reset after refresh") - + history_slots = [] for i in range(10): with gr.Column(visible=False) as slot: with gr.Accordion(visible=False, open=False) as accordion: - video = gr.Video(interactive=False) - detail_md = gr.Markdown() - history_slots.append((slot, accordion, video, detail_md)) - + video = gr.Video(interactive=False) + detail_md = gr.Markdown() + history_slots.append((slot, accordion, video, detail_md)) + with gr.Accordion("View System Log (DEV ONLY)", open=False): logs_display = gr.Markdown() refresh_logs_btn = gr.Button("Refresh Log", variant="secondary") - - refresh_logs_btn.click( - update_log_display, - outputs=logs_display - ) + + refresh_logs_btn.click(update_log_display, outputs=logs_display) gr.Examples( examples=[ - ["scene_1", "Exit the bedroom and turn left. Walk straight passing the gray couch and stop near the rug.", "episode_0"], + [ + "scene_1", + "Exit the bedroom and turn left. Walk straight passing the gray couch and stop near the rug.", + "episode_0", + ], ["scene_2", "Go from reception to conference room passing the water cooler.", "episode_1"], ["scene_3", "From the classroom, go to the library via the main hall.", "episode_2"], - ["scene_4", "From emergency room to pharmacy passing nurse station.", "episode_3"] + ["scene_4", "From emergency room to pharmacy passing nurse station.", "episode_3"], ], inputs=[scene_dropdown, prompt_input, episode_dropdown], - label="Navigation Task Example" + label="Navigation Task Example", ) - + submit_btn.click( fn=run_simulation, inputs=[scene_dropdown, prompt_input, episode_dropdown, history_state], outputs=[video_output, history_state], queue=True, - api_name="run_simulation" + api_name="run_simulation", ).then( fn=update_history_display, inputs=history_state, outputs=[comp for slot in history_slots for comp in slot], - queue=True + queue=True, ).then( fn=update_log_display, outputs=logs_display, ) - - demo.load( - fn=lambda: update_scene_display("scene_1"), - outputs=[scene_description, scene_preview] - ).then( - fn=update_log_display, - outputs=logs_display - ) - demo.load( - fn=lambda: update_episode_display("scene_1", "episode_1"), - outputs=[fps_preview] + demo.load(fn=lambda: update_scene_display("scene_1"), outputs=[scene_description, scene_preview]).then( + fn=update_log_display, outputs=logs_display ) + demo.load(fn=lambda: update_episode_display("scene_1", "episode_1"), outputs=[fps_preview]) def record_access(request: gr.Request): user_ip = request.client.host if request else "unknown" user_agent = request.headers.get("user-agent", "unknown") log_access(user_ip, user_agent) - return update_log_display() + return update_log_display() - demo.load( - fn=record_access, - inputs=None, - outputs=logs_display, - queue=False - ) + demo.load(fn=record_access, inputs=None, outputs=logs_display, queue=False) demo.queue(default_concurrency_limit=8) demo.unload(fn=cleanup_session) if __name__ == "__main__": - demo.launch(server_name="0.0.0.0", server_port=5750, debug=True, share = True, allowed_paths=["/mnt"]) - - + demo.launch(server_name="0.0.0.0", server_port=5750, debug=True, share=True, allowed_paths=["/mnt"]) diff --git a/scripts/eval/vln_gradio_backend.py b/scripts/demo/vln_gradio_backend.py similarity index 80% rename from scripts/eval/vln_gradio_backend.py rename to scripts/demo/vln_gradio_backend.py index 28bdb0b3..b6f89095 100644 --- a/scripts/eval/vln_gradio_backend.py +++ b/scripts/demo/vln_gradio_backend.py @@ -1,33 +1,24 @@ -from concurrent.futures import ThreadPoolExecutor import argparse -import asyncio import base64 -import threading -import subprocess -import multiprocessing -import numpy as np +import json import os -import torch -import uvicorn import sys -import time -import json import uuid -from typing import Dict, Optional -from fastapi import APIRouter, FastAPI, HTTPException, status, BackgroundTasks, Response -from pydantic import BaseModel + # from utils.log_util import logger -import logging from enum import Enum -from internnav.utils.dist import * -from internnav.habitat_extensions.evaluator_single import VLNEvaluator -from internnav.model.basemodel.internvla_n1.internvla_n1 import InternVLAN1ForCausalLM -from transformers import ( - AutoTokenizer, - AutoProcessor, - Qwen2_5_VLForConditionalGeneration, -) +from typing import Dict, Optional +import numpy as np +import torch +import uvicorn +from fastapi import APIRouter, BackgroundTasks, FastAPI, HTTPException, status +from pydantic import BaseModel +from transformers import AutoProcessor + +from internnav.model.basemodel.internvla_n1.internvla_n1 import InternVLAN1ForCausalLM +from internnav.projects.habitat_extensions.evaluator_single import VLNEvaluator +from internnav.utils.dist import * PROJECT_ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(PROJECT_ROOT_PATH) @@ -36,14 +27,16 @@ global instruction + class VideoRequest(BaseModel): """ Frontend post json object template """ + user: str task: str job_id: str - data: str + data: str """ data json template @@ -63,21 +56,22 @@ class VideoRequest(BaseModel): } """ + class TaskInfo: def __init__(self, task_id, status, result_path): self.task_id: str = task_id self.status: str = status self.result_path: str = result_path + class TaskStatus(str, Enum): - pending = 'pending', + pending = ('pending',) completed = 'completed' failed = 'failed' terminated = 'terminated' - - -class BackendServer: + +class BackendServer: def __init__(self, host: str, port: int): self.host = host self.port = port @@ -93,7 +87,7 @@ def __init__(self, host: str, port: int): def _register_routes(self): route_config = [ ('/video', self.predict, ['POST'], None), - ('/task/{task_id}', self.get_task_status, ['GET'], Dict[str, Optional[str]]) + ('/task/{task_id}', self.get_task_status, ['GET'], Dict[str, Optional[str]]), ] for path, handler, methods, response_model in route_config: @@ -102,22 +96,15 @@ def _register_routes(self): endpoint=handler, methods=methods, response_model=response_model if 'GET' in methods else None, - status_code=status.HTTP_200_OK if 'GET' in methods else None + status_code=status.HTTP_200_OK if 'GET' in methods else None, ) - - async def predict( - self, - request: VideoRequest, - background_tasks: BackgroundTasks - ) -> Dict[str, str]: + + async def predict(self, request: VideoRequest, background_tasks: BackgroundTasks) -> Dict[str, str]: # Safety: allow tasks pending to MAX_TASK_LIMIT # TODO: may need to improve if sum(task.status == "pending" for task in self.tasks.values()) >= self.MAX_TASK_LIMIT: print(f"Failed to START Task: reach to limit") - raise HTTPException( - status_code=429, - detail=f"Failed to start new task: reach to limit" - ) + raise HTTPException(status_code=429, detail=f"Failed to start new task: reach to limit") task_id = str(uuid.uuid4()) path = os.path.join(output_path, task_id) @@ -127,11 +114,11 @@ async def predict( background_tasks.add_task(self._submit_task, task_id, request.data, path) print(f"Start Task: {task_id} for user: {request.user}, task: {request.task}") - + return {"task_id": task_id, "status": "pending"} def _submit_task(self, task_id: str, data: str, path: str): - + print(f"process task: ID={task_id}") print(f"receive data: {data}...") # 只打印前100个字符 try: @@ -140,17 +127,18 @@ def _submit_task(self, task_id: str, data: str, path: str): print("=======VLN Eval Task=======") cache_dir = f"/tmp/InternNav/.triton" os.makedirs(cache_dir, exist_ok=True) - os.chmod(cache_dir, 0o777) + os.chmod(cache_dir, 0o777) evaluator.infer_scene_id = int(data_dict["scene_index"]) - 1 evaluator.infer_episode_id = int(data_dict["episode_index"]) - 1 - evaluator.infer_instruction = (data_dict["instruction"]) + evaluator.infer_instruction = data_dict["instruction"] evaluator.output_path = path evaluator.infer_data_ready = True evaluator.run_single_eval() - + except Exception as e: import traceback + print(traceback.format_exc()) self.tasks[task_id].status = "failed" print(f"Task {task_id} failed: {e}") @@ -168,14 +156,8 @@ async def get_task_status(self, task_id: str) -> Dict[str, Optional[str]]: return {"status": 'completed', "result": task.result_path, "video": video_data} - def run(self): - uvicorn.run( - '__main__:server.app', - host=self.host, - port=self.port - ) - + uvicorn.run('__main__:server.app', host=self.host, port=self.port) if __name__ == "__main__": @@ -196,17 +178,13 @@ def run(self): parser.add_argument("--predict_step_nums", type=int, default=32) parser.add_argument("--continuous_traj", action="store_true", default=True) parser.add_argument("--max_new_tokens", type=int, default=1024) - - parser.add_argument('--world_size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--rank', default=0, type=int, - help='rank') - parser.add_argument('--gpu', default=0, type=int, - help='gpu') + + parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes') + parser.add_argument('--rank', default=0, type=int, help='rank') + parser.add_argument('--gpu', default=0, type=int, help='gpu') parser.add_argument('--port', default='2443') parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') - parser.add_argument('--device', default='cuda', - help='device to use for training / testing') + parser.add_argument('--device', default='cuda', help='device to use for training / testing') args = parser.parse_args() init_distributed_mode(args) local_rank = args.local_rank @@ -217,8 +195,7 @@ def run(self): device = torch.device(f"cuda:{local_rank}") model = InternVLAN1ForCausalLM.from_pretrained( - args.model_path, torch_dtype=torch.bfloat16, - attn_implementation="flash_attention_2", device_map={"": device} + args.model_path, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map={"": device} ) model.eval() world_size = get_world_size() @@ -230,11 +207,9 @@ def run(self): model=model, processor=processor, epoch=0, - args=args + args=args, ) # evaluator.eval_action(0) server = BackendServer(host="0.0.0.0", port=8001) server.run() - - diff --git a/scripts/eval/eval_dual_system.sh b/scripts/eval/bash/eval_dual_system.sh similarity index 93% rename from scripts/eval/eval_dual_system.sh rename to scripts/eval/bash/eval_dual_system.sh index 2bc91a13..ef4be1eb 100755 --- a/scripts/eval/eval_dual_system.sh +++ b/scripts/eval/bash/eval_dual_system.sh @@ -1,5 +1,5 @@ export MAGNUM_LOG=quiet HABITAT_SIM_LOG=quiet -export NCCL_SOCKET_IFNAME=bond0 +export NCCL_SOCKET_IFNAME=bond0 export NCCL_IB_HCA=mlx5_2,mlx5_3,mlx5_4,mlx5_5 MID_RUN_NAME="InternVLA-N1" @@ -16,4 +16,4 @@ srun -p efm_t \ --model_path checkpoints/${MID_RUN_NAME} \ --predict_step_nums 32 \ --continuous_traj \ - --output_path results/$MID_RUN_NAME/val_unseen_32traj_8steps \ \ No newline at end of file + --output_path results/$MID_RUN_NAME/val_unseen_32traj_8steps \ diff --git a/scripts/eval/eval_system2.sh b/scripts/eval/bash/eval_system2.sh similarity index 85% rename from scripts/eval/eval_system2.sh rename to scripts/eval/bash/eval_system2.sh index 2c4dbd80..bedfbb3b 100755 --- a/scripts/eval/eval_system2.sh +++ b/scripts/eval/bash/eval_system2.sh @@ -1,5 +1,5 @@ export MAGNUM_LOG=quiet HABITAT_SIM_LOG=quiet -export NCCL_SOCKET_IFNAME=bond0 +export NCCL_SOCKET_IFNAME=bond0 export NCCL_IB_HCA=mlx5_2,mlx5_3,mlx5_4,mlx5_5 @@ -16,4 +16,4 @@ srun -p efm_t \ python scripts/eval/eval_habitat.py \ --model_path /path/to/${MID_RUN_NAME} \ --mode system2 \ - --output_path results/$MID_RUN_NAME/val_unseen \ \ No newline at end of file + --output_path results/$MID_RUN_NAME/val_unseen \ diff --git a/scripts/eval/start_aliyun_dlc.sh b/scripts/eval/bash/start_aliyun_dlc.sh similarity index 100% rename from scripts/eval/start_aliyun_dlc.sh rename to scripts/eval/bash/start_aliyun_dlc.sh diff --git a/scripts/eval/start_eval.sh b/scripts/eval/bash/start_eval.sh similarity index 99% rename from scripts/eval/start_eval.sh rename to scripts/eval/bash/start_eval.sh index 478c3bf9..1f472a8a 100755 --- a/scripts/eval/start_eval.sh +++ b/scripts/eval/bash/start_eval.sh @@ -112,4 +112,4 @@ while true; do fi fi fi -done \ No newline at end of file +done diff --git a/scripts/eval/start_eval_iros.sh b/scripts/eval/bash/start_eval_iros.sh similarity index 99% rename from scripts/eval/start_eval_iros.sh rename to scripts/eval/bash/start_eval_iros.sh index 0615c105..12d819dd 100644 --- a/scripts/eval/start_eval_iros.sh +++ b/scripts/eval/bash/start_eval_iros.sh @@ -105,4 +105,4 @@ if [ -n "$processes" ]; then kill -9 $pid echo "kill: $pid" done -fi \ No newline at end of file +fi diff --git a/scripts/eval/eval_habitat.py b/scripts/eval/eval_habitat.py index b125b873..46d54370 100644 --- a/scripts/eval/eval_habitat.py +++ b/scripts/eval/eval_habitat.py @@ -1,21 +1,15 @@ import argparse -import os import json +import os +import numpy as np import torch -from transformers import ( - AutoTokenizer, - AutoProcessor, - Qwen2_5_VLForConditionalGeneration, -) - -import habitat +from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration -import numpy as np from internnav.model.basemodel.internvla_n1.internvla_n1 import InternVLAN1ForCausalLM +from internnav.projects.habitat_extensions.evaluator import VLNEvaluator from internnav.utils.dist import * -from internnav.habitat_extensions.evaluator import VLNEvaluator -from internnav.habitat_extensions import measures + def parse_args(): @@ -25,7 +19,7 @@ def parse_args(): parser.add_argument("--model_path", type=str, default="") parser.add_argument("--habitat_config_path", type=str, default='scripts/eval/configs/vln_r2r.yaml') parser.add_argument("--eval_split", type=str, default='val_unseen') - parser.add_argument("--output_path", type=str, default='./logs/habitat/test') #! + parser.add_argument("--output_path", type=str, default='./logs/habitat/test') #! parser.add_argument("--num_future_steps", type=int, default=4) parser.add_argument("--num_frames", type=int, default=32) parser.add_argument("--save_video", action="store_true", default=False) @@ -35,50 +29,50 @@ def parse_args(): parser.add_argument("--predict_step_nums", type=int, default=16) parser.add_argument("--continuous_traj", action="store_true", default=False) parser.add_argument("--max_new_tokens", type=int, default=1024) - - parser.add_argument('--world_size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--rank', default=0, type=int, - help='rank') - parser.add_argument('--gpu', default=0, type=int, - help='gpu') + + parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes') + parser.add_argument('--rank', default=0, type=int, help='rank') + parser.add_argument('--gpu', default=0, type=int, help='gpu') parser.add_argument('--port', default='2333') parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') - parser.add_argument('--device', default='cuda', - help='device to use for training / testing') - + parser.add_argument('--device', default='cuda', help='device to use for training / testing') + return parser.parse_args() def main(): args = parse_args() - + init_distributed_mode(args) local_rank = args.local_rank np.random.seed(local_rank) - - #* 1. Load model and tokenizer. Currently, we support two modes: dual_system and system2 in Habitat. + + # * 1. Load model and tokenizer. Currently, we support two modes: dual_system and system2 in Habitat. processor = AutoProcessor.from_pretrained(args.model_path) processor.tokenizer.padding_side = 'left' - + device = torch.device(f"cuda:{local_rank}") if args.mode == 'dual_system': model = InternVLAN1ForCausalLM.from_pretrained( - args.model_path, torch_dtype=torch.bfloat16, - attn_implementation="flash_attention_2", device_map={"": device} + args.model_path, + torch_dtype=torch.bfloat16, + attn_implementation="flash_attention_2", + device_map={"": device}, ) elif args.mode == 'system2': model = Qwen2_5_VLForConditionalGeneration.from_pretrained( - args.model_path, torch_dtype=torch.bfloat16, - attn_implementation="flash_attention_2", device_map={"": device} + args.model_path, + torch_dtype=torch.bfloat16, + attn_implementation="flash_attention_2", + device_map={"": device}, ) else: raise ValueError(f"Invalid mode: {args.mode}") model.eval() world_size = get_world_size() - - #* 2. initialize evaluator + + # * 2. initialize evaluator evaluator = VLNEvaluator( config_path=args.habitat_config_path, split=args.eval_split, @@ -87,11 +81,11 @@ def main(): model=model, processor=processor, epoch=0, - args=args + args=args, ) - - #* 3. do eval - sucs, spls, oss, nes, ep_num = evaluator.eval_action(idx=get_rank()) + + # * 3. do eval + sucs, spls, oss, nes, ep_num = evaluator.eval_action(idx=get_rank()) ep_num_all = [torch.zeros_like(ep_num) for _ in range(world_size)] # import ipdb; ipdb.set_trace() @@ -105,24 +99,24 @@ def main(): dist.all_gather(spls_all, spls) dist.all_gather(oss_all, oss) dist.all_gather(nes_all, nes) - + sucs_all = torch.cat(sucs_all, dim=0) spls_all = torch.cat(spls_all, dim=0) oss_all = torch.cat(oss_all, dim=0) nes_all = torch.cat(nes_all, dim=0) result_all = { - "sucs_all": (sum(sucs_all)/len(sucs_all)).item(), - "spls_all": (sum(spls_all)/len(spls_all)).item(), - "oss_all": (sum(oss_all)/len(oss_all)).item(), - "nes_all": (sum(nes_all)/len(nes_all)).item(), - 'length': len(sucs_all) - } + "sucs_all": (sum(sucs_all) / len(sucs_all)).item(), + "spls_all": (sum(spls_all) / len(spls_all)).item(), + "oss_all": (sum(oss_all) / len(oss_all)).item(), + "nes_all": (sum(nes_all) / len(nes_all)).item(), + 'length': len(sucs_all), + } print(result_all) if get_rank() == 0: with open(os.path.join(args.output_path, f'result.json'), 'a') as f: f.write(json.dumps(result_all)) - - + + if __name__ == '__main__': main() diff --git a/scripts/eval/start_server.py b/scripts/eval/start_server.py new file mode 100644 index 00000000..eb001127 --- /dev/null +++ b/scripts/eval/start_server.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +import sys + +sys.path.append('.') + +import argparse +import glob +import importlib +import importlib.util +import os +import sys + +from internnav.utils.comm_utils.server import AgentServer + + +# import all agents to register them +def auto_register_agents(agent_dir: str): + # Get all Python files in the agents directory + agent_modules = glob.glob(os.path.join(agent_dir, '*.py')) + + # Import each module to trigger the registration + for module in agent_modules: + if not module.endswith('__init__.py'): # Avoid importing __init__.py itself + module_name = os.path.basename(module)[:-3] # Remove the .py extension + importlib.import_module( + f'internnav_baselines.agents.{module_name}' + ) # Replace 'agents' with your module's package + + +def load_eval_cfg(config_path, attr_name='eval_cfg'): + spec = importlib.util.spec_from_file_location("eval_config_module", config_path) + config_module = importlib.util.module_from_spec(spec) + sys.modules["eval_config_module"] = config_module + spec.loader.exec_module(config_module) + return getattr(config_module, attr_name) + + +if __name__ == '__main__': + print("Starting Agent Server...") + + print("Registering agents...") + auto_register_agents('internnav_baselines/agents') + + parser = argparse.ArgumentParser() + parser.add_argument('--host', type=str, default='localhost') + parser.add_argument( + '--config', + type=str, + default=None, + help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py', + ) + parser.add_argument('--port', type=int, default=8087) + parser.add_argument('--reload', action='store_true') + args = parser.parse_args() + if args.config: + eval_cfg = load_eval_cfg(args.config) + args.port = eval_cfg.agent.server_port + else: + print(f"Warning: No config file provided, using port {args.port}") + + server = AgentServer(args.host, args.port) + server.run() diff --git a/challenge/README.md b/scripts/iros_challenge/README.md similarity index 100% rename from challenge/README.md rename to scripts/iros_challenge/README.md diff --git a/challenge/demo.gif b/scripts/iros_challenge/demo.gif similarity index 100% rename from challenge/demo.gif rename to scripts/iros_challenge/demo.gif diff --git a/scripts/eval/eval_iros.py b/scripts/iros_challenge/eval_iros.py similarity index 100% rename from scripts/eval/eval_iros.py rename to scripts/iros_challenge/eval_iros.py diff --git a/challenge/health_check.sh b/scripts/iros_challenge/health_check.sh similarity index 99% rename from challenge/health_check.sh rename to scripts/iros_challenge/health_check.sh index 63c7c7ce..86c23399 100644 --- a/challenge/health_check.sh +++ b/scripts/iros_challenge/health_check.sh @@ -86,4 +86,4 @@ while true; do fi fi fi -done \ No newline at end of file +done diff --git a/challenge/onsite_competition/README.md b/scripts/iros_challenge/onsite_competition/README.md similarity index 100% rename from challenge/onsite_competition/README.md rename to scripts/iros_challenge/onsite_competition/README.md diff --git a/challenge/onsite_competition/captures/rs_meta.json b/scripts/iros_challenge/onsite_competition/captures/rs_meta.json similarity index 100% rename from challenge/onsite_competition/captures/rs_meta.json rename to scripts/iros_challenge/onsite_competition/captures/rs_meta.json diff --git a/challenge/onsite_competition/captures/rs_rgb.jpg b/scripts/iros_challenge/onsite_competition/captures/rs_rgb.jpg similarity index 100% rename from challenge/onsite_competition/captures/rs_rgb.jpg rename to scripts/iros_challenge/onsite_competition/captures/rs_rgb.jpg diff --git a/challenge/onsite_competition/captures/sim_depth.npy b/scripts/iros_challenge/onsite_competition/captures/sim_depth.npy similarity index 100% rename from challenge/onsite_competition/captures/sim_depth.npy rename to scripts/iros_challenge/onsite_competition/captures/sim_depth.npy diff --git a/challenge/onsite_competition/captures/sim_rgb.npy b/scripts/iros_challenge/onsite_competition/captures/sim_rgb.npy similarity index 100% rename from challenge/onsite_competition/captures/sim_rgb.npy rename to scripts/iros_challenge/onsite_competition/captures/sim_rgb.npy diff --git a/challenge/onsite_competition/onsite_competition_rules_en-US.md b/scripts/iros_challenge/onsite_competition/onsite_competition_rules_en-US.md similarity index 100% rename from challenge/onsite_competition/onsite_competition_rules_en-US.md rename to scripts/iros_challenge/onsite_competition/onsite_competition_rules_en-US.md diff --git a/challenge/onsite_competition/onsite_competition_rules_zh-CN.md b/scripts/iros_challenge/onsite_competition/onsite_competition_rules_zh-CN.md similarity index 100% rename from challenge/onsite_competition/onsite_competition_rules_zh-CN.md rename to scripts/iros_challenge/onsite_competition/onsite_competition_rules_zh-CN.md diff --git a/challenge/onsite_competition/sdk/cam.py b/scripts/iros_challenge/onsite_competition/sdk/cam.py similarity index 100% rename from challenge/onsite_competition/sdk/cam.py rename to scripts/iros_challenge/onsite_competition/sdk/cam.py diff --git a/challenge/onsite_competition/sdk/control.py b/scripts/iros_challenge/onsite_competition/sdk/control.py similarity index 100% rename from challenge/onsite_competition/sdk/control.py rename to scripts/iros_challenge/onsite_competition/sdk/control.py diff --git a/challenge/onsite_competition/sdk/main.py b/scripts/iros_challenge/onsite_competition/sdk/main.py similarity index 100% rename from challenge/onsite_competition/sdk/main.py rename to scripts/iros_challenge/onsite_competition/sdk/main.py diff --git a/challenge/onsite_competition/sdk/real_world_env.py b/scripts/iros_challenge/onsite_competition/sdk/real_world_env.py similarity index 100% rename from challenge/onsite_competition/sdk/real_world_env.py rename to scripts/iros_challenge/onsite_competition/sdk/real_world_env.py diff --git a/challenge/onsite_competition/sdk/save_obs.py b/scripts/iros_challenge/onsite_competition/sdk/save_obs.py similarity index 100% rename from challenge/onsite_competition/sdk/save_obs.py rename to scripts/iros_challenge/onsite_competition/sdk/save_obs.py diff --git a/challenge/onsite_competition/sdk/stream.py b/scripts/iros_challenge/onsite_competition/sdk/stream.py similarity index 100% rename from challenge/onsite_competition/sdk/stream.py rename to scripts/iros_challenge/onsite_competition/sdk/stream.py diff --git a/challenge/onsite_competition/sdk/test_agent.py b/scripts/iros_challenge/onsite_competition/sdk/test_agent.py similarity index 100% rename from challenge/onsite_competition/sdk/test_agent.py rename to scripts/iros_challenge/onsite_competition/sdk/test_agent.py diff --git a/challenge/onsite_competition/sdk/test_robot.py b/scripts/iros_challenge/onsite_competition/sdk/test_robot.py similarity index 100% rename from challenge/onsite_competition/sdk/test_robot.py rename to scripts/iros_challenge/onsite_competition/sdk/test_robot.py diff --git a/challenge/output.gif b/scripts/iros_challenge/output.gif similarity index 100% rename from challenge/output.gif rename to scripts/iros_challenge/output.gif diff --git a/challenge/start_eval_iros.sh b/scripts/iros_challenge/start_eval_iros.sh similarity index 100% rename from challenge/start_eval_iros.sh rename to scripts/iros_challenge/start_eval_iros.sh diff --git a/scripts/eval/inference_only_demo.ipynb b/scripts/notebooks/inference_only_demo.ipynb similarity index 100% rename from scripts/eval/inference_only_demo.ipynb rename to scripts/notebooks/inference_only_demo.ipynb diff --git a/setup.cfg b/setup.cfg index c055fee2..4118ae4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,4 +43,4 @@ per-file-ignores=*/__init__.py:F401 ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117 max-line-length = 120 max-complexity = 30 -exclude=_*,.vscode,.git,docs/**,**/test/**,**/lcmtypes/**,internnav/model/**,*.ipynb +exclude=_*,.vscode,.git,docs/**,**/test/**,**/lcmtypes/**,*.ipynb,scripts/**,internnav/projects/** diff --git a/tests/function_test/test_server.py b/tests/function_test/test_server.py index 885bf366..7e4d7b4e 100644 --- a/tests/function_test/test_server.py +++ b/tests/function_test/test_server.py @@ -9,7 +9,7 @@ def start_server(): server_cmd = [ sys.executable, - "internnav/utils/comm_utils/server.py", + "scripts/eval/start_server.py", ] proc = subprocess.Popen(