From 62b248d169e8bdb9b6dfa1aa91425e2e68b95daf Mon Sep 17 00:00:00 2001 From: AyushP Date: Fri, 2 Dec 2022 20:06:10 -0500 Subject: [PATCH 1/6] Get working code --- monodepth2/datasets/mono_dataset.py | 2 +- monodepth2/networks/depth_decoder.py | 4 ++-- monodepth2/networks/pose_cnn.py | 6 +++--- monodepth2/networks/pose_decoder.py | 29 ++++++++++++++++++++++++---- monodepth2/trainer.py | 20 +++++++++++++++---- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/monodepth2/datasets/mono_dataset.py b/monodepth2/datasets/mono_dataset.py index a381934..fc7eaba 100644 --- a/monodepth2/datasets/mono_dataset.py +++ b/monodepth2/datasets/mono_dataset.py @@ -173,7 +173,7 @@ def __getitem__(self, index): inputs[("inv_K", scale)] = torch.from_numpy(inv_K) if do_color_aug: - color_aug = transforms.ColorJitter.get_params( + color_aug = transforms.ColorJitter( self.brightness, self.contrast, self.saturation, self.hue) else: color_aug = (lambda x: x) diff --git a/monodepth2/networks/depth_decoder.py b/monodepth2/networks/depth_decoder.py index 786899a..3a98591 100644 --- a/monodepth2/networks/depth_decoder.py +++ b/monodepth2/networks/depth_decoder.py @@ -61,7 +61,7 @@ def forward(self, input_features): x = self.convs[("upconv", i, 1)](x) if i in self.scales: outs = self.sigmoid(self.convs[("dispconv", i)](x)) - self.outputs[("disp", i)] = outs[:, 0, :, :] - self.outputs[("disp-sigma", i)] = outs[:, 1, :, :] + self.outputs[("disp", i)] = torch.unsqueeze(outs[:, 0, :, :], axis=1) + self.outputs[("disp-sigma", i)] = torch.unsqueeze(outs[:, 1, :, :], axis=1) return self.outputs diff --git a/monodepth2/networks/pose_cnn.py b/monodepth2/networks/pose_cnn.py index a8e4dda..6f6c6d7 100644 --- a/monodepth2/networks/pose_cnn.py +++ b/monodepth2/networks/pose_cnn.py @@ -31,11 +31,11 @@ def __init__(self, num_input_frames): self.num_convs = len(self.convs) - self.relu = nn.ReLU(True) + self.relu = nn.ReLU() - self.tanh = nn.Tanh(True) + self.tanh = nn.Tanh() - self.softplus = nn.Softplus(True) + self.softplus = nn.Softplus() self.net = nn.ModuleList(list(self.convs.values())) diff --git a/monodepth2/networks/pose_decoder.py b/monodepth2/networks/pose_decoder.py index 4b03b60..99f23c8 100644 --- a/monodepth2/networks/pose_decoder.py +++ b/monodepth2/networks/pose_decoder.py @@ -28,6 +28,11 @@ def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=Non self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1) self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1) + self.a_conv = nn.Conv2d(256, num_frames_to_predict_for, 1) + self.b_conv = nn.Conv2d(256, num_frames_to_predict_for, 1) + + self.tanh = nn.Tanh() + self.softplus = nn.Softplus() self.relu = nn.ReLU() self.net = nn.ModuleList(list(self.convs.values())) @@ -39,16 +44,32 @@ def forward(self, input_features): cat_features = torch.cat(cat_features, 1) out = cat_features + out_ab = None for i in range(3): out = self.convs[("pose", i)](out) if i != 2: out = self.relu(out) - out = out.mean(3).mean(2) + if i==1: + out_ab = out + + out_pose = out.mean(3).mean(2) + + + out_a = self.a_conv(out_ab) + out_a = self.softplus(out_a) + out_a = out_a.mean(3).mean(2) + out_b = self.b_conv(out_ab) + out_b = self.tanh(out_b) + out_b = out_b.mean(3).mean(2) out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) + out_a = 0.01 * out_a.view(-1, self.num_frames_to_predict_for, 1, 1) + out_b = 0.01 * out_b.view(-1, self.num_frames_to_predict_for, 1, 1) - axisangle = out[..., :3] - translation = out[..., 3:] + axisangle = out_pose[..., :3] + translation = out_pose[..., 3:] + a = out_a + b = out_b - return axisangle, translation + return axisangle, translation, a, b diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 90450ac..22a461d 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -121,6 +121,8 @@ def __init__(self, options): self.train_loader = DataLoader( train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True ) + + # breakpoint() val_dataset = self.dataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext ) @@ -243,6 +245,7 @@ def process_batch(self, inputs): if self.use_pose_net: outputs.update(self.predict_poses(inputs, features)) + # breakpoint() self.generate_images_pred(inputs, outputs) losses = self.compute_losses(inputs, outputs) @@ -323,10 +326,10 @@ def val(self): """ self.set_eval() try: - inputs = self.val_iter.next() + inputs = next(self.val_iter) except StopIteration: self.val_iter = iter(self.val_loader) - inputs = self.val_iter.next() + inputs = next(self.val_iter) with torch.no_grad(): outputs, losses = self.process_batch(inputs) @@ -345,10 +348,13 @@ def generate_images_pred(self, inputs, outputs): """ for scale in self.opt.scales: disp = outputs[("disp", scale)] + sigma = outputs[("disp-sigma",scale)] if self.opt.v1_multiscale: source_scale = scale else: disp = F.interpolate(disp, [self.opt.height, self.opt.width], mode="bilinear", align_corners=False) + disp_sigma = F.interpolate(sigma, [self.opt.height, self.opt.width], mode="bilinear", align_corners=False) + outputs[("disp-sigma",scale)] = disp_sigma source_scale = 0 _, depth = disp_to_depth(disp, self.opt.min_depth, self.opt.max_depth) @@ -394,6 +400,8 @@ def compute_reprojection_loss(self, pred, target, sigma): if self.opt.no_ssim: reprojection_loss = l1_loss else: + # print(sigma.shape, self.ssim(pred, target).shape,pred.shape,target.shape) + # print() ssim_loss = (self.ssim(pred, target)) / sigma + torch.log(sigma) reprojection_loss = 0.85 * ssim_loss + 0.15 * l1_loss @@ -424,6 +432,8 @@ def compute_losses(self, inputs, outputs): color = inputs[("color", 0, scale)] target = inputs[("color", 0, source_scale)] + # print(disp.shape,sigma.shape) + for frame_id in self.opt.frame_ids[1:]: pred = outputs[("color", frame_id, scale)] a = outputs[("a", 0, frame_id)].unsqueeze(1) @@ -440,7 +450,7 @@ def compute_losses(self, inputs, outputs): identity_reprojection_losses = [] for frame_id in self.opt.frame_ids[1:]: pred = inputs[("color", frame_id, source_scale)] - identity_reprojection_losses.append(self.compute_reprojection_loss(pred, target)) + identity_reprojection_losses.append(self.compute_reprojection_loss(pred, target,sigma)) identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1) @@ -487,8 +497,10 @@ def compute_losses(self, inputs, outputs): mean_disp = disp.mean(2, True).mean(3, True) norm_disp = disp / (mean_disp + 1e-7) + # breakpoint() smooth_loss = get_smooth_loss(norm_disp, color) - reg_loss = smooth_loss + self.opt.ab_weight * ab_loss + + reg_loss = smooth_loss + self.opt.ab_weight * torch.mean(ab_loss) loss += self.opt.disparity_smoothness * reg_loss / (2 ** scale) From 6689acd38f983f98716e48c79d174147b49bdc34 Mon Sep 17 00:00:00 2001 From: Valmiki Kothare Date: Sat, 3 Dec 2022 14:04:54 -0500 Subject: [PATCH 2/6] Brightness transformation added to identity reprojection loss --- monodepth2/trainer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 22a461d..a0a6d1d 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -450,7 +450,10 @@ def compute_losses(self, inputs, outputs): identity_reprojection_losses = [] for frame_id in self.opt.frame_ids[1:]: pred = inputs[("color", frame_id, source_scale)] - identity_reprojection_losses.append(self.compute_reprojection_loss(pred, target,sigma)) + a = outputs[("a", 0, frame_id)].unsqueeze(1) + b = outputs[("b", 0, frame_id)].unsqueeze(1) + target_frame = target * a + b + identity_reprojection_losses.append(self.compute_reprojection_loss(pred, target_frame, sigma)) identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1) From de8a0ec8e0aa569b15b7a91d079cf8a26cce4129 Mon Sep 17 00:00:00 2001 From: AyushP Date: Sat, 3 Dec 2022 15:20:16 -0500 Subject: [PATCH 3/6] Fix PoseNet and DepthNet integration 1. Removed 0.01 multiplication factor from PoseNet a and b. 2. Reprojection loss was getting scaled twice by sigma, removed the scecond scaling. 3. Transformed sigma while scaling reprojection loss to 10*sigma + 0.1 as mentioned here -> https://github.com/no-Seaweed/Learning-Deep-Learning-1/blob/master/paper_notes/sfm_learner.md 4. Removed print statements and breakpoints --- monodepth2/networks/pose_cnn.py | 4 ++-- monodepth2/trainer.py | 16 +++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/monodepth2/networks/pose_cnn.py b/monodepth2/networks/pose_cnn.py index 6f6c6d7..17b89be 100644 --- a/monodepth2/networks/pose_cnn.py +++ b/monodepth2/networks/pose_cnn.py @@ -55,8 +55,8 @@ def forward(self, out): out_b = out_b.mean(3).mean(2) out_pose = 0.01 * out_pose.view(-1, self.num_input_frames - 1, 1, 6) - out_a = 0.01 * out_a.view(-1, self.num_input_frames - 1, 1, 1) - out_b = 0.01 * out_b.view(-1, self.num_input_frames - 1, 1, 1) + out_a = out_a.view(-1, self.num_input_frames - 1, 1, 1) + out_b = out_b.view(-1, self.num_input_frames - 1, 1, 1) axisangle = out_pose[..., :3] translation = out_pose[..., 3:] diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 22a461d..0f2f624 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -122,7 +122,6 @@ def __init__(self, options): train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True ) - # breakpoint() val_dataset = self.dataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext ) @@ -245,7 +244,6 @@ def process_batch(self, inputs): if self.use_pose_net: outputs.update(self.predict_poses(inputs, features)) - # breakpoint() self.generate_images_pred(inputs, outputs) losses = self.compute_losses(inputs, outputs) @@ -395,19 +393,18 @@ def compute_reprojection_loss(self, pred, target, sigma): """Computes reprojection loss between a batch of predicted and target images """ abs_diff = torch.abs(target - pred) - l1_loss = abs_diff + l1_loss = abs_diff.mean(1, True) if self.opt.no_ssim: reprojection_loss = l1_loss else: - # print(sigma.shape, self.ssim(pred, target).shape,pred.shape,target.shape) - # print() - ssim_loss = (self.ssim(pred, target)) / sigma + torch.log(sigma) + ssim_loss = (self.ssim(pred, target)).mean(1, True) reprojection_loss = 0.85 * ssim_loss + 0.15 * l1_loss - reprojection_loss = reprojection_loss / sigma + torch.log(sigma) + # Reference: https://github.com/no-Seaweed/Learning-Deep-Learning-1/blob/master/paper_notes/sfm_learner.md + transformed_sigma = (10 * sigma + 0.1) + reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) - reprojection_loss = reprojection_loss.mean(1, True) return reprojection_loss @@ -432,8 +429,6 @@ def compute_losses(self, inputs, outputs): color = inputs[("color", 0, scale)] target = inputs[("color", 0, source_scale)] - # print(disp.shape,sigma.shape) - for frame_id in self.opt.frame_ids[1:]: pred = outputs[("color", frame_id, scale)] a = outputs[("a", 0, frame_id)].unsqueeze(1) @@ -497,7 +492,6 @@ def compute_losses(self, inputs, outputs): mean_disp = disp.mean(2, True).mean(3, True) norm_disp = disp / (mean_disp + 1e-7) - # breakpoint() smooth_loss = get_smooth_loss(norm_disp, color) reg_loss = smooth_loss + self.opt.ab_weight * torch.mean(ab_loss) From a326163305c2a0446c68f7f128c13d3fcde3e7af Mon Sep 17 00:00:00 2001 From: AyushP Date: Sat, 3 Dec 2022 20:10:47 -0500 Subject: [PATCH 4/6] Add visualization and sigma - 1 loss as a commment --- monodepth2/trainer.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 5f6656f..542f6c0 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -14,6 +14,8 @@ import torch.optim as optim from torch.utils.data import DataLoader from tensorboardX import SummaryWriter +from torchvision.utils import save_image + import json @@ -211,11 +213,11 @@ def run_epoch(self): self.compute_depth_losses(inputs, outputs, losses) self.log("train", inputs, outputs, losses) - self.val() + self.val(self.epoch * 10 + batch_idx) self.step += 1 - def process_batch(self, inputs): + def process_batch(self, inputs, batch_idx = -1): """Pass a minibatch through the network and generate images and losses """ for key, ipt in inputs.items(): @@ -245,7 +247,7 @@ def process_batch(self, inputs): outputs.update(self.predict_poses(inputs, features)) self.generate_images_pred(inputs, outputs) - losses = self.compute_losses(inputs, outputs) + losses = self.compute_losses(inputs, outputs, batch_idx) return outputs, losses @@ -319,7 +321,7 @@ def predict_poses(self, inputs, features): return outputs - def val(self): + def val(self, batch_idx): """Validate the model on a single minibatch """ self.set_eval() @@ -330,7 +332,7 @@ def val(self): inputs = next(self.val_iter) with torch.no_grad(): - outputs, losses = self.process_batch(inputs) + outputs, losses = self.process_batch(inputs, batch_idx) if "depth_gt" in inputs: self.compute_depth_losses(inputs, outputs, losses) @@ -402,13 +404,13 @@ def compute_reprojection_loss(self, pred, target, sigma): reprojection_loss = 0.85 * ssim_loss + 0.15 * l1_loss # Reference: https://github.com/no-Seaweed/Learning-Deep-Learning-1/blob/master/paper_notes/sfm_learner.md - transformed_sigma = (10 * sigma + 0.1) + # transformed_sigma = (10 * sigma + 0.1) + transformed_sigma = sigma + 0.001 reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) - return reprojection_loss - def compute_losses(self, inputs, outputs): + def compute_losses(self, inputs, outputs, batch_idx=-1): """Compute the reprojection and smoothness losses for a minibatch """ losses = {} @@ -434,6 +436,10 @@ def compute_losses(self, inputs, outputs): a = outputs[("a", 0, frame_id)].unsqueeze(1) b = outputs[("b", 0, frame_id)].unsqueeze(1) target_frame = target * a + b + if batch_idx != -1: + save_image(target_frame[-1], f'val_images/target_frame_{self.step}.jpeg') + save_image(target[-1], f'val_images/target_{self.step}.jpeg') + reprojection_losses.append(self.compute_reprojection_loss(pred, target_frame, sigma)) ab_losses.append((a - 1) ** 2 + b ** 2) @@ -499,6 +505,7 @@ def compute_losses(self, inputs, outputs): reg_loss = smooth_loss + self.opt.ab_weight * torch.mean(ab_loss) + #loss += torch.mean((sigma - 1) ** 2) loss += self.opt.disparity_smoothness * reg_loss / (2 ** scale) total_loss += loss From 74c53af14aaf55024c53ee08fdcc14319dfb43ed Mon Sep 17 00:00:00 2001 From: AyushP Date: Mon, 5 Dec 2022 19:59:31 -0500 Subject: [PATCH 5/6] Latest training code --- monodepth2/evaluate_depth.py | 2 +- monodepth2/test_simple.py | 12 +++++++++++- monodepth2/trainer.py | 13 ++++++++++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/monodepth2/evaluate_depth.py b/monodepth2/evaluate_depth.py index 7746ef9..5b11b15 100644 --- a/monodepth2/evaluate_depth.py +++ b/monodepth2/evaluate_depth.py @@ -163,7 +163,7 @@ def evaluate(opt): quit() gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz") - gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1')["data"] + gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"] print("-> Evaluating") diff --git a/monodepth2/test_simple.py b/monodepth2/test_simple.py index d74d63b..063d8e0 100644 --- a/monodepth2/test_simple.py +++ b/monodepth2/test_simple.py @@ -131,13 +131,23 @@ def test_simple(args): input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) + output_name = os.path.splitext(os.path.basename(image_path))[0] disp = outputs[("disp", 0)] + + dis_sigma = outputs[("disp-sigma", 0)] + disp_sigma_resized = torch.nn.functional.interpolate( + disp, (original_height, original_width), mode="bilinear", align_corners=False) + disp_sigma_im = disp_sigma_resized.detach().cpu().numpy() + disp_sigma_im = pil.fromarray((disp_sigma_im[0][0] * 255.0).astype(np.uint8)) + name_dest_im_sigma = os.path.join(output_directory, "{}_disp_sigma.jpeg".format(output_name)) + disp_sigma_im.save(name_dest_im_sigma) + + disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file - output_name = os.path.splitext(os.path.basename(image_path))[0] scaled_disp, depth = disp_to_depth(disp, 0.1, 100) if args.pred_metric_depth: name_dest_npy = os.path.join(output_directory, "{}_depth.npy".format(output_name)) diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 542f6c0..77e0d92 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -405,8 +405,12 @@ def compute_reprojection_loss(self, pred, target, sigma): # Reference: https://github.com/no-Seaweed/Learning-Deep-Learning-1/blob/master/paper_notes/sfm_learner.md # transformed_sigma = (10 * sigma + 0.1) - transformed_sigma = sigma + 0.001 - reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) + + # Exp 1 + # transformed_sigma = sigma + 0.001 + # reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) + + reprojection_loss = (reprojection_loss * sigma) return reprojection_loss @@ -505,7 +509,10 @@ def compute_losses(self, inputs, outputs, batch_idx=-1): reg_loss = smooth_loss + self.opt.ab_weight * torch.mean(ab_loss) - #loss += torch.mean((sigma - 1) ** 2) + loss += torch.mean((sigma - 1) ** 2) + # categorical_loss = nn.CrossEntropyLoss() + # loss += categorical_loss(sigma, torch.ones_like(sigma)) + # print(sigma.min(), sigma.max(), sigma.mean()) loss += self.opt.disparity_smoothness * reg_loss / (2 ** scale) total_loss += loss From 73e35d32b28907bc292d532459200d8a0aa52ba3 Mon Sep 17 00:00:00 2001 From: AyushP Date: Wed, 7 Dec 2022 23:34:39 -0500 Subject: [PATCH 6/6] Change the range of sigma from 1 to infinity --- monodepth2/networks/depth_decoder.py | 7 ++++--- monodepth2/trainer.py | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/monodepth2/networks/depth_decoder.py b/monodepth2/networks/depth_decoder.py index 3a98591..5c1082c 100644 --- a/monodepth2/networks/depth_decoder.py +++ b/monodepth2/networks/depth_decoder.py @@ -46,6 +46,7 @@ def __init__(self, num_ch_enc, scales=range(4), num_output_channels=2, use_skips self.decoder = nn.ModuleList(list(self.convs.values())) self.sigmoid = nn.Sigmoid() + self.relu = nn.ReLU() def forward(self, input_features): self.outputs = {} @@ -60,8 +61,8 @@ def forward(self, input_features): x = torch.cat(x, 1) x = self.convs[("upconv", i, 1)](x) if i in self.scales: - outs = self.sigmoid(self.convs[("dispconv", i)](x)) - self.outputs[("disp", i)] = torch.unsqueeze(outs[:, 0, :, :], axis=1) - self.outputs[("disp-sigma", i)] = torch.unsqueeze(outs[:, 1, :, :], axis=1) + outs = self.convs[("dispconv", i)](x) + self.outputs[("disp", i)] = self.sigmoid(torch.unsqueeze(outs[:, 0, :, :], axis=1)) + self.outputs[("disp-sigma", i)] = self.relu(torch.unsqueeze(outs[:, 1, :, :], axis=1)) return self.outputs diff --git a/monodepth2/trainer.py b/monodepth2/trainer.py index 77e0d92..77a345e 100644 --- a/monodepth2/trainer.py +++ b/monodepth2/trainer.py @@ -407,10 +407,10 @@ def compute_reprojection_loss(self, pred, target, sigma): # transformed_sigma = (10 * sigma + 0.1) # Exp 1 - # transformed_sigma = sigma + 0.001 - # reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) + transformed_sigma = sigma + 1 + reprojection_loss = (reprojection_loss / transformed_sigma) + torch.log(transformed_sigma) - reprojection_loss = (reprojection_loss * sigma) + # reprojection_loss = (reprojection_loss * sigma) return reprojection_loss @@ -441,6 +441,7 @@ def compute_losses(self, inputs, outputs, batch_idx=-1): b = outputs[("b", 0, frame_id)].unsqueeze(1) target_frame = target * a + b if batch_idx != -1: + save_image(pred[-1], f'val_images/pred_{self.step}.jpeg') save_image(target_frame[-1], f'val_images/target_frame_{self.step}.jpeg') save_image(target[-1], f'val_images/target_{self.step}.jpeg') @@ -509,7 +510,7 @@ def compute_losses(self, inputs, outputs, batch_idx=-1): reg_loss = smooth_loss + self.opt.ab_weight * torch.mean(ab_loss) - loss += torch.mean((sigma - 1) ** 2) + # loss += torch.mean((sigma - 1) ** 2) # categorical_loss = nn.CrossEntropyLoss() # loss += categorical_loss(sigma, torch.ones_like(sigma)) # print(sigma.min(), sigma.max(), sigma.mean())