Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 43 additions & 31 deletions internnav/dataset/navdp_dataset_lerobot.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
image_size=224,
scene_data_scale=1.0,
trajectory_data_scale=1.0,
pixel_channel=7,
debug=False,
preload=False,
random_digit=False,
Expand All @@ -61,13 +62,16 @@ def __init__(
self.trajectory_afford_path = []
self.random_digit = random_digit
self.prior_sample = prior_sample
self.pixel_channel = pixel_channel
self.item_cnt = 0
self.batch_size = batch_size
self.batch_time_sum = 0.0
self._last_time = None

if preload is False:
for group_dir in self.dataset_dirs: # gibson_zed, 3dfront ...
if 'fixed' in group_dir:
continue
all_scene_dirs = np.array([p for p in os.listdir(os.path.join(root_dirs, group_dir))])
select_scene_dirs = all_scene_dirs[
np.arange(0, all_scene_dirs.shape[0], 1 / self.scene_scale_size).astype(np.int32)
Expand All @@ -79,6 +83,9 @@ def __init__(
]
for traj_dir in tqdm(select_traj_dirs):
entire_task_dir = os.path.join(root_dirs, group_dir, scene_dir, traj_dir)
video_dir = os.path.join(entire_task_dir, "videos/")
if not os.path.exists(video_dir):
continue
rgb_dir = os.path.join(entire_task_dir, "videos/chunk-000/observation.images.rgb/")
depth_dir = os.path.join(entire_task_dir, "videos/chunk-000/observation.images.depth/")
data_path = os.path.join(
Expand Down Expand Up @@ -114,11 +121,11 @@ def __init__(
json.dump(save_dict, f, indent=4)
else:
load_dict = json.load(open(preload_path, 'r'))
self.trajectory_dirs = load_dict['trajectory_dirs'] * 50
self.trajectory_data_dir = load_dict['trajectory_data_dir'] * 50
self.trajectory_rgb_path = load_dict['trajectory_rgb_path'] * 50
self.trajectory_depth_path = load_dict['trajectory_depth_path'] * 50
self.trajectory_afford_path = load_dict['trajectory_afford_path'] * 50
self.trajectory_dirs = load_dict['trajectory_dirs'] #* 50
self.trajectory_data_dir = load_dict['trajectory_data_dir'] #* 50
self.trajectory_rgb_path = load_dict['trajectory_rgb_path'] #* 50
self.trajectory_depth_path = load_dict['trajectory_depth_path']#* 50
self.trajectory_afford_path = load_dict['trajectory_afford_path'] #* 50

def __len__(self):
return len(self.trajectory_dirs)
Expand Down Expand Up @@ -190,23 +197,26 @@ def process_path_points(self, index):
return np.array(trajectory_path.points), trajectory_path

def process_obstacle_points(self, index, path_points):
trajectory_pcd = self.load_pointcloud(self.trajectory_afford_path[index])
trajectory_color = np.array(trajectory_pcd.colors)
trajectory_points = np.array(trajectory_pcd.points)
color_distance = np.abs(trajectory_color - np.array([0, 0, 0.5])).sum(axis=-1) # the obstacles are save in blue
path_lower_bound = path_points.min(axis=0)
path_upper_bound = path_points.max(axis=0)
condition_x = (trajectory_points[:, 0] >= path_lower_bound[0] - 2.0) & (
trajectory_points[:, 0] <= path_upper_bound[0] + 2.0
)
condition_y = (trajectory_points[:, 1] >= path_lower_bound[1] - 2.0) & (
trajectory_points[:, 1] <= path_upper_bound[1] + 2.0
)
select_index = np.where((color_distance < 0.05) & condition_x & condition_y)[0]
trajectory_obstacle = o3d.geometry.PointCloud()
trajectory_obstacle.points = o3d.utility.Vector3dVector(np.asarray(trajectory_pcd.points)[select_index])
trajectory_obstacle.colors = o3d.utility.Vector3dVector(np.asarray(trajectory_pcd.colors)[select_index])
return np.array(trajectory_obstacle.points), trajectory_obstacle
try:
trajectory_pcd = self.load_pointcloud(self.trajectory_afford_path[index])
trajectory_color = np.array(trajectory_pcd.colors)
trajectory_points = np.array(trajectory_pcd.points)
color_distance = np.abs(trajectory_color - np.array([0, 0, 0.5])).sum(axis=-1) # the obstacles are save in blue
path_lower_bound = path_points.min(axis=0)
path_upper_bound = path_points.max(axis=0)
condition_x = (trajectory_points[:, 0] >= path_lower_bound[0] - 2.0) & (
trajectory_points[:, 0] <= path_upper_bound[0] + 2.0
)
condition_y = (trajectory_points[:, 1] >= path_lower_bound[1] - 2.0) & (
trajectory_points[:, 1] <= path_upper_bound[1] + 2.0
)
select_index = np.where((color_distance < 0.05) & condition_x & condition_y)[0]
trajectory_obstacle = o3d.geometry.PointCloud()
trajectory_obstacle.points = o3d.utility.Vector3dVector(np.asarray(trajectory_pcd.points)[select_index])
trajectory_obstacle.colors = o3d.utility.Vector3dVector(np.asarray(trajectory_pcd.colors)[select_index])
return np.array(trajectory_obstacle.points), trajectory_obstacle
except:
return np.zeros((0,3)), None

def process_memory(self, rgb_paths, depth_paths, start_step, memory_digit=1):
memory_index = np.arange(start_step - (self.memory_size - 1) * memory_digit, start_step + 1, memory_digit)
Expand Down Expand Up @@ -509,7 +519,8 @@ def __getitem__(self, index):
camera_intrinsic,
trajectory_base_extrinsic,
)
pixel_goal = np.concatenate((pixel_goal, memory_images[-1]), axis=-1)
if self.pixel_channel == 7:
pixel_goal = np.concatenate((pixel_goal, memory_images[-1]), axis=-1)

pred_actions = (pred_actions[1:] - pred_actions[:-1]) * 4.0
augment_actions = (augment_actions[1:] - augment_actions[:-1]) * 4.0
Expand Down Expand Up @@ -545,7 +556,8 @@ def __getitem__(self, index):
augment_critic,
float(pixel_flag),
)




def navdp_collate_fn(batch):

Expand All @@ -566,17 +578,17 @@ def navdp_collate_fn(batch):
if __name__ == "__main__":
os.makedirs("./navdp_dataset_test/", exist_ok=True)
dataset = NavDP_Base_Datset(
"/shared/smartbot_new/liuyu/vln-n1-minival/",
"./navdp_dataset_test/dataset_lerobot.json",
"/shared/smartbot_new/liuyu/vln-n1",
"/shared/smartbot_new/caiwenzhe/InternNav/internnav/dataset/navdp_dataset_test/dataset_lerobot_balanced.json",
8,
24,
224,
trajectory_data_scale=0.1,
scene_data_scale=0.1,
preload=False,
trajectory_data_scale=1.0,
scene_data_scale=1.0,
preload=True,
)

for i in range(10):
from tqdm import tqdm
for i in tqdm(range(dataset.__len__())):
(
point_goal,
image_goal,
Expand Down
19 changes: 11 additions & 8 deletions internnav/model/basemodel/navdp/navdp_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
elif pretrained_model_name_or_path is None or len(pretrained_model_name_or_path) == 0:
pass
else:
incompatible_keys, _ = model.load_state_dict(
torch.load(pretrained_model_name_or_path)['state_dict'], strict=False
)
incompatible_keys, _ = model.load_state_dict(torch.load(pretrained_model_name_or_path), strict=False)
if len(incompatible_keys) > 0:
print(f'Incompatible keys: {incompatible_keys}')

Expand All @@ -66,13 +64,12 @@ def __init__(self, config: NavDPModelConfig):
self.model_config = ModelCfg(**config.model_cfg['model'])
else:
self.model_config = config

self.config.model_cfg['il']

self._device = torch.device(f"cuda:{config.model_cfg['local_rank']}")
self.image_size = self.config.model_cfg['il']['image_size']
self.memory_size = self.config.model_cfg['il']['memory_size']
self.predict_size = self.config.model_cfg['il']['predict_size']
self.pixel_channel = self.config.model_cfg['il']['pixel_channel']
self.temporal_depth = self.config.model_cfg['il']['temporal_depth']
self.attention_heads = self.config.model_cfg['il']['heads']
self.input_channels = self.config.model_cfg['il']['channels']
Expand All @@ -83,14 +80,16 @@ def __init__(self, config: NavDPModelConfig):
self.rgbd_encoder = NavDP_RGBD_Backbone(
self.image_size, self.token_dim, memory_size=self.memory_size, finetune=self.finetune, device=self._device
)
self.pixel_encoder = NavDP_PixelGoal_Backbone(self.image_size, self.token_dim, device=self._device)
self.pixel_encoder = NavDP_PixelGoal_Backbone(
self.image_size, self.token_dim, pixel_channel=self.pixel_channel, device=self._device
)
self.image_encoder = NavDP_ImageGoal_Backbone(self.image_size, self.token_dim, device=self._device)
self.point_encoder = nn.Linear(3, self.token_dim)

if not self.finetune:
for p in self.rgbd_encoder.parameters():
for p in self.rgbd_encoder.rgb_model.parameters():
p.requires_grad = False
self.rgbd_encoder.eval()
self.rgbd_encoder.rgb_model.eval()

decoder_layer = nn.TransformerDecoderLayer(
d_model=self.token_dim,
Expand Down Expand Up @@ -349,3 +348,7 @@ def predict_nogoal_batch_action_vel(self, input_images, input_depths, sample_num
negative_trajectory = torch.cumsum(naction / 4.0, dim=1)[(critic_values).argsort()[0:8]]
positive_trajectory = torch.cumsum(naction / 4.0, dim=1)[(-critic_values).argsort()[0:8]]
return negative_trajectory, positive_trajectory


# if __name__ == "__main__":
# policy = NavDPNet(config=)
4 changes: 2 additions & 2 deletions internnav/model/encoder/navdp_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def _get_device(self):


class NavDP_PixelGoal_Backbone(nn.Module):
def __init__(self, image_size=224, embed_size=512, device='cuda:0'):
def __init__(self, image_size=224, embed_size=512, pixel_channel=7, device='cuda:0'):
super().__init__()
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Expand All @@ -392,7 +392,7 @@ def __init__(self, image_size=224, embed_size=512, device='cuda:0'):
self.pixelgoal_encoder = DepthAnythingV2(**model_configs['vits'])
self.pixelgoal_encoder = self.pixelgoal_encoder.pretrained.float()
self.pixelgoal_encoder.patch_embed.proj = nn.Conv2d(
in_channels=7,
in_channels=pixel_channel,
out_channels=self.pixelgoal_encoder.patch_embed.proj.out_channels,
kernel_size=self.pixelgoal_encoder.patch_embed.proj.kernel_size,
stride=self.pixelgoal_encoder.patch_embed.proj.stride,
Expand Down
9 changes: 5 additions & 4 deletions scripts/train/configs/navdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
inflection_weight_coef=3.2,
save_interval_epochs=5,
save_filter_frozen_weights=False,
load_from_ckpt=False,
ckpt_to_load='',
load_from_ckpt=True,
ckpt_to_load='/shared/smartbot_new/caiwenzhe/InternNav/checkpoints/cross-waic-final4-125.ckpt',
lmdb_map_size=1e12,
dataset_r2r_root_dir='data/vln_pe/raw_data/r2r',
dataset_3dgs_root_dir='',
Expand All @@ -48,9 +48,10 @@
lerobot_features_dir='data/vln_pe/traj_data/r2r',
camera_name='pano_camera_0',
report_to='tensorboard', # wandb, tensorboard, none
dataset_navdp='data/datasets/navdp_dataset_lerobot.json',
root_dir='data/datasets/InternData-N1/vln_n1/traj_data',
dataset_navdp='./navdp_dataset_lerobot.json',
root_dir='/shared/smartbot_new/liuyu/vln-n1-minival/',
image_size=224,
pixel_channel=4,
scene_scale=1.0,
preload=False,
random_digit=False,
Expand Down
Loading
Loading