Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions coordnet_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import tensorflow as tf
from tensorflow.python.layers import base


class AddCoords(base.Layer):
"""Add coords to a tensor"""
def __init__(self, x_dim=64, y_dim=64, with_r=False):
super(AddCoords, self).__init__()
self.x_dim = x_dim
self.y_dim = y_dim
self.with_r = with_r

def call(self, input_tensor, **kwargs):
"""
input_tensor: (batch, x_dim, y_dim, c)
"""
batch_size_tensor = tf.shape(input_tensor)[0]
xx_ones = tf.ones([batch_size_tensor, self.x_dim], dtype=tf.int32)
xx_ones = tf.expand_dims(xx_ones, -1)
xx_range = tf.tile(tf.expand_dims(tf.range(self.y_dim), 0),
[batch_size_tensor, 1])
xx_range = tf.expand_dims(xx_range, 1)
xx_channel = tf.matmul(xx_ones, xx_range)
xx_channel = tf.expand_dims(xx_channel, -1)
yy_ones = tf.ones([batch_size_tensor, self.y_dim], dtype=tf.int32)
yy_ones = tf.expand_dims(yy_ones, 1)
yy_range = tf.tile(tf.expand_dims(tf.range(self.x_dim), 0),
[batch_size_tensor, 1])
yy_range = tf.expand_dims(yy_range, -1)
yy_channel = tf.matmul(yy_range, yy_ones)
yy_channel = tf.expand_dims(yy_channel, -1)
xx_channel = tf.cast(xx_channel, 'float32') / (self.x_dim - 1)
yy_channel = tf.cast(yy_channel, 'float32') / (self.y_dim - 1)
xx_channel = xx_channel*2 - 1
yy_channel = yy_channel*2 - 1
ret = tf.concat([input_tensor, xx_channel, yy_channel], axis=-1)
if self.with_r:
rr = tf.sqrt(tf.square(xx_channel) + tf.square(yy_channel))
ret = tf.concat([ret, rr], axis=-1)
return ret


class CoordConv(base.Layer):
"""CoordConv layer as in the paper."""
def __init__(self, x_dim, y_dim, with_r, *args, **kwargs):
super(CoordConv, self).__init__()
self.addcoords = AddCoords(x_dim=x_dim,
y_dim=y_dim,
with_r=with_r)
self.conv = tf.layers.Conv2D(*args, **kwargs)

def call(self, input_tensor, **kwargs):
ret = self.addcoords(input_tensor)
ret = self.conv(ret)
return ret


def coord_conv(x_dim, y_dim, with_r, inputs, *args, **kwargs):
layer = CoordConv(x_dim, y_dim, with_r, *args, **kwargs)
return layer.apply(inputs)

37 changes: 21 additions & 16 deletions data/config/reward_config.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
general:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are all the below changes justified? especially the oversampling ratios...

epochs: 5000
save_every_epochs: 1
# gpu_usage: 0.1
gpu_usage: 0.8 # with vision
# scenario: 'no_obstacles'
# scenario: 'simple'
# scenario: 'hard'
# scenario: 'vision'
scenario: 'vision_harder'
number_of_unzippers: 10
number_of_unzippers: 1

openrave_rl:
action_step_size: 0.025
Expand All @@ -21,24 +19,31 @@ openrave_rl:
truncate_penalty: 0.05

model:
# batch_size: 10240
batch_size: 2000 # with vision
potential_points: [2, 0., 0.075, 3, 0., 0.085, 4, -0.02, 0.05, 4, 0.005, 0.05, 5, 0.005, 0.035, 5, -0.02, 0.035]
consider_goal_pose: True
goal_configuration_distance_sensitivity: 1.23

reward:
train:
epochs: 20
save_every_epochs: 1
# batch_size: 10240
batch_size: 300 # with vision
test_every_batches: 1000
initial_learn_rate: 0.001
# initial_learn_rate: 0.01
decrease_learn_rate_after: 2000
# decrease_learn_rate_after: 10000
# learn_rate_decrease_rate: 0.8
learn_rate_decrease_rate: 1.0
decrease_learn_rate_after: 10000
learn_rate_decrease_rate: 0.5
gradient_limit: 5.0
# gradient_limit: 0.0
layers: [100, 100, 100, 100]
# layers: [200, 200, 200, 200, 200]
l2_regularization_coefficient: 0.0001
cross_entropy_coefficient: 1.0
oversample_goal: 0.2
oversample_collision: 0.38

network:
layers: [300, 300, 300, 200, 100, 100]
activation: 'elu'
oversample_goal: 1.0
oversample_collision: 1.0
use_coordnet: true
image_network: 'resnet'
train_vae: true
vae_latent_dim: 150
resnet_num_of_residual_blocks: 6

28 changes: 18 additions & 10 deletions dqn_model.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
import tensorflow as tf
from coordnet_model import coord_conv


class DqnModel:
def __init__(self, prefix):
def __init__(self, prefix, config):
self.prefix = '{}_dqn'.format(prefix)
self.config = config
self.use_coordnet = self.config['network']['use_coordnet']

def predict(self, workspace_image, reuse_flag):
conv1 = tf.layers.conv2d(workspace_image, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True,
name='{}_conv1'.format(self.prefix), reuse=reuse_flag)
conv2 = tf.layers.conv2d(conv1, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
if self.use_coordnet:
workspace_image = coord_conv(55, 111, False, workspace_image, 32, 8, 4, padding='same',
activation=tf.nn.relu, use_bias=True, name='{}_conv1'.format(self.prefix),
_reuse=reuse_flag)

conv2 = tf.layers.conv2d(workspace_image, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
name='{}_conv2'.format(self.prefix), reuse=reuse_flag)
# conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True)
# flat = tf.layers.flatten(conv3)
flat = tf.layers.flatten(conv2, name='{}_flat'.format(self.prefix))
conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True)

flat = tf.layers.flatten(conv3, name='{}_flat'.format(self.prefix))
dense1 = tf.layers.dense(flat, 512, activation=tf.nn.relu, name='{}_dense1'.format(self.prefix),
reuse=reuse_flag)
dense2 = tf.layers.dense(dense1, 512, activation=None, name='{}_dense2'.format(self.prefix), reuse=reuse_flag)
return dense2

dense2 = tf.layers.dense(dense1, 512, activation=tf.nn.relu, name='{}_dense2'.format(self.prefix),
reuse=reuse_flag)
dense3 = tf.layers.dense(dense2, 512, activation=tf.nn.relu, name='{}_dense3'.format(self.prefix),
reuse=reuse_flag)
return dense3
25 changes: 25 additions & 0 deletions generate_reward_workspaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from workspace_generation_utils import *
from image_cache import ImageCache
import os

TOTAL_WORKSPACES = 10000
OUTPUT_DIR = "scenario_params/vision_harder"


if not os.path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)

generator = WorkspaceGenerator(obstacle_count_probabilities={2: 0.05, 3: 0.5, 4: 0.4, 5: 0.05})
for i in range(TOTAL_WORKSPACES):
save_path = os.path.join(OUTPUT_DIR, '{}_workspace.pkl'.format(i))

if os.path.exists(save_path):
print("workspace %d already exists" % i)
continue

print("generateing workspace %d" % i)
workspace_params = generator.generate_workspace()
workspace_params.save(save_path)

print("Creating Image Cache")
ImageCache(OUTPUT_DIR, True)
194 changes: 194 additions & 0 deletions image_vae_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
from reward_data_manager import get_image_cache
import time
import datetime
import numpy as np
import os
import yaml
import tensorflow as tf
from vae_network import VAENetwork


class VAEModel:

def __init__(self, model_name, config, models_base_dir, tensorboard_dir):

self.model_name = model_name
self.config = config

self.model_dir = os.path.join(models_base_dir, self.model_name)
if not os.path.exists(self.model_dir):
os.makedirs(self.model_dir)

self.train_summaries = []
self.test_summaries = []

self.epochs = config['general']['epochs']
self.save_every_epochs = config['general']['save_every_epochs']
self.train_vae = config['reward']['train_vae']

inputs_example = tf.placeholder(tf.float32, (None, 55, 111), name='example')
self.network = VAENetwork(config, self.model_dir, inputs_example.shape)

self.global_step = 0
self.global_step_var = tf.Variable(0, trainable=False)

self.loss = self.init_loss()
self.optimizer = self.init_optimizer()

with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd:
yaml.dump(config, fd)

self.train_board = self.TensorBoard(tensorboard_dir, 'train_' + model_name, self.train_summaries)
self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries)

def load(self, session):
self.network.load_weights(session)

def make_feed(self, data_batch):
return self.network.make_feed(*data_batch)

def predict(self, data_batch, session):
feed = self.make_feed(data_batch)
return session.run([self.prediction], feed)[0]

def init_loss(self):
status_loss_scale = self.config['reward']['cross_entropy_coefficient']
img_loss, latent_loss, total_loss = self.network.get_loss()

image_loss_summary = tf.summary.scalar('Image_Loss', img_loss)
latent_loss_summary = tf.summary.scalar('Latent_Loss', latent_loss)

regularization_loss = tf.losses.get_regularization_loss()
regularization_loss_summary = tf.summary.scalar('Regularization_Loss', regularization_loss)

# total_loss = total_loss + regularization_loss
total_loss_summary = tf.summary.scalar('Total_Loss', total_loss)

self.train_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary]
self.test_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary]

return total_loss

def init_optimizer(self):
initial_learn_rate = self.config['reward']['initial_learn_rate']
decrease_learn_rate_after = self.config['reward']['decrease_learn_rate_after']
learn_rate_decrease_rate = self.config['reward']['learn_rate_decrease_rate']

learning_rate = tf.train.exponential_decay(initial_learn_rate,
self.global_step_var,
decrease_learn_rate_after,
learn_rate_decrease_rate,
staircase=True)
self.train_summaries.append(tf.summary.scalar('Learn_Rate', learning_rate))

optimizer = tf.train.AdamOptimizer(learning_rate)

gradients, variables = zip(*optimizer.compute_gradients(self.loss, tf.trainable_variables()))
initial_gradients_norm = tf.global_norm(gradients)
gradient_limit = self.config['reward']['gradient_limit']
if gradient_limit > 0.0:
gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm)
clipped_gradients_norm = tf.global_norm(gradients)
initial_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Initial', initial_gradients_norm)
clipped_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Clipped', clipped_gradients_norm)
self.train_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]
self.test_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]

return optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step_var)

def _train_batch(self, train_batch, session):
train_feed = {self.network.workspace_image_inputs: train_batch}
train_summary, self.global_step, img_loss, _ = session.run(
[self.train_board.summaries, self.global_step_var, self.network.encoded, self.optimizer],
train_feed)
# print(img_loss)
self.train_board.writer.add_summary(train_summary, self.global_step)

def _test_batch(self, test_batch, session):
test_feed = {self.network.workspace_image_inputs: test_batch}
test_summary = session.run(
[self.test_board.summaries],
test_feed)[0]
self.test_board.writer.add_summary(test_summary, self.global_step)
self.test_board.writer.flush()

def train(self, train_data, test_data, session):
session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer())

test_every_batches = self.config['reward']['test_every_batches']

total_train_batches = 0
for epoch in range(self.epochs):

train_batch_count = 1
for train_batch in train_data:
self._train_batch(train_batch, session)
print("Finished epoch %d/%d batch %d/%d" % (epoch+1, self.epochs, train_batch_count, total_train_batches))
train_batch_count += 1

if train_batch_count % test_every_batches == 0:
test_batch = next(test_data.__iter__()) # random test batch
self._test_batch(test_batch, session)
# save the model
# self.network.save_weights(session, self.global_step)

total_train_batches = train_batch_count - 1
self.train_board.writer.flush()

test_batch = next(test_data.__iter__()) # random test batch
self._test_batch(test_batch, session)

# save the model
# if epoch == self.epochs - 1 or epoch % self.save_every_epochs == self.save_every_epochs - 1:
# self.network.save_weights(session, self.global_step)

print('done epoch {} of {}, global step {}'.format(epoch, self.epochs, self.global_step))

class TensorBoard:

def __init__(self, tensorboard_path, board_name, summaries):
self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name))
self.summaries = tf.summary.merge(summaries)


def count_weights():
total_parameters = 0
for variable in tf.trainable_variables():
# shape is an array of tf.Dimension
shape = variable.get_shape()
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
total_parameters += variable_parameters
print(total_parameters)

if __name__ == '__main__':
# read the config
config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
with open(config_path, 'r') as yml_file:
config = yaml.load(yml_file)
print('------------ Config ------------')
print(yaml.dump(config))

model_name = "vae" + datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')

image_cache = get_image_cache(config)
batch_size = 1
images_data = [image.np_array for image in image_cache.items.values()]
images_batch_data = [images_data[i:i+batch_size] for i in range(0, len(images_data), batch_size)]

train_data_count = int(len(images_batch_data) * 0.8)
train_data = images_batch_data[:train_data_count]
test_data = images_batch_data[train_data_count:]

models_base_dir = os.path.join('data', 'reward', 'model')
vae_model = VAEModel(model_name, config, models_base_dir, tensorboard_dir=models_base_dir)



gpu_usage = config['general']['gpu_usage']
session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage))
with tf.Session(config=session_config) as session:
count_weights()
vae_model.train(train_data, test_data, session)
Loading