diff --git a/README.md b/README.md index 9b8ce959..0b93b634 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ -deeppose +DeepPose ======== +NOTE: This is not official implementation. Original paper is [DeepPose: Human Pose Estimation via Deep Neural Networks](http://arxiv.org/abs/1312.4659). + # Requirements - [Chainer](https://github.com/pfnet/chainer) (Neural network framework) @@ -36,6 +38,14 @@ This script downloads FLIC-full dataset (http://vision.grasp.upenn.edu/cgi-bin/i ## For FLIC Dataset +Just run: + +``` +nohup python scripts/train.py > AlexNet_flic_LCN_AdaGrad_lr-0.0005.log 2>&1 & +``` + +It is same as: + ``` nohup python scripts/train.py \ --model models/AlexNet_flic.py \ @@ -46,16 +56,18 @@ nohup python scripts/train.py \ --snapshot 10 \ --datadir data/FLIC-full \ --channel 3 \ ---flip True \ +--flip 1 \ --size 220 \ --crop_pad_inf 1.5 \ --crop_pad_sup 2.0 \ --shift 5 \ ---lcn True \ +--lcn 1 \ --joint_num 7 \ > AlexNet_LCN_AdaGrad_lr-0.0005.log 2>&1 & ``` +`--flip 1` means it performs LR flip augmentation, and `--flip 0` does nothing. `--lcn 1` means local(should be said "global"?) contrast normalization will be applied. + See the help messages with `--help` option for details. ### GPU memory requirement diff --git a/scripts/train.py b/scripts/train.py index e79a71f9..70d4b80c 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -8,12 +8,10 @@ import logging import time import os -import sys import imp import shutil import numpy as np -from chainer import optimizers, cuda, Variable -import chainer.functions as F +from chainer import optimizers, cuda from transform import Transform import cPickle as pickle from draw_loss import draw_loss_curve @@ -178,12 +176,12 @@ def get_log_msg(stage, epoch, sum_loss, N, args, st): help='model definition file in models dir') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--epoch', type=int, default=1000) - parser.add_argument('--batchsize', type=int, default=128) + parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--prefix', type=str, default='AlexNet_flic') parser.add_argument('--snapshot', type=int, default=10) parser.add_argument('--datadir', type=str, default='data/FLIC-full') parser.add_argument('--channel', type=int, default=3) - parser.add_argument('--flip', type=bool, default=True, + parser.add_argument('--flip', type=int, default=1, help='flip left and right for data augmentation') parser.add_argument('--size', type=int, default=220, help='resizing') @@ -191,9 +189,9 @@ def get_log_msg(stage, epoch, sum_loss, N, args, st): help='random number infimum for padding size when cropping') parser.add_argument('--crop_pad_sup', type=float, default=2.0, help='random number supremum for padding size when cropping') - parser.add_argument('--shift', type=int, default=10, + parser.add_argument('--shift', type=int, default=5, help='slide an image when cropping') - parser.add_argument('--lcn', type=bool, default=True, + parser.add_argument('--lcn', type=int, default=1, help='local contrast normalization for data augmentation') parser.add_argument('--joint_num', type=int, default=7) parser.add_argument('--fname_index', type=int, default=0, @@ -220,11 +218,13 @@ def get_log_msg(stage, epoch, sum_loss, N, args, st): logging.info('# of test data:{}'.format(N_test)) # augmentation setting + _flip = bool(args.flip) + _lcn = bool(args.lcn) trans = Transform(padding=[args.crop_pad_inf, args.crop_pad_sup], - flip=args.flip, + flip=_flip, size=args.size, shift=args.shift, - lcn=args.lcn) + lcn=_lcn) logging.info(time.strftime('%Y-%m-%d_%H-%M-%S')) logging.info('start training...')