diff --git a/coordnet_model.py b/coordnet_model.py new file mode 100644 index 0000000..260f3aa --- /dev/null +++ b/coordnet_model.py @@ -0,0 +1,61 @@ +import tensorflow as tf +from tensorflow.python.layers import base + + +class AddCoords(base.Layer): + """Add coords to a tensor""" + def __init__(self, x_dim=64, y_dim=64, with_r=False): + super(AddCoords, self).__init__() + self.x_dim = x_dim + self.y_dim = y_dim + self.with_r = with_r + + def call(self, input_tensor, **kwargs): + """ + input_tensor: (batch, x_dim, y_dim, c) + """ + batch_size_tensor = tf.shape(input_tensor)[0] + xx_ones = tf.ones([batch_size_tensor, self.x_dim], dtype=tf.int32) + xx_ones = tf.expand_dims(xx_ones, -1) + xx_range = tf.tile(tf.expand_dims(tf.range(self.y_dim), 0), + [batch_size_tensor, 1]) + xx_range = tf.expand_dims(xx_range, 1) + xx_channel = tf.matmul(xx_ones, xx_range) + xx_channel = tf.expand_dims(xx_channel, -1) + yy_ones = tf.ones([batch_size_tensor, self.y_dim], dtype=tf.int32) + yy_ones = tf.expand_dims(yy_ones, 1) + yy_range = tf.tile(tf.expand_dims(tf.range(self.x_dim), 0), + [batch_size_tensor, 1]) + yy_range = tf.expand_dims(yy_range, -1) + yy_channel = tf.matmul(yy_range, yy_ones) + yy_channel = tf.expand_dims(yy_channel, -1) + xx_channel = tf.cast(xx_channel, 'float32') / (self.x_dim - 1) + yy_channel = tf.cast(yy_channel, 'float32') / (self.y_dim - 1) + xx_channel = xx_channel*2 - 1 + yy_channel = yy_channel*2 - 1 + ret = tf.concat([input_tensor, xx_channel, yy_channel], axis=-1) + if self.with_r: + rr = tf.sqrt(tf.square(xx_channel) + tf.square(yy_channel)) + ret = tf.concat([ret, rr], axis=-1) + return ret + + +class CoordConv(base.Layer): + """CoordConv layer as in the paper.""" + def __init__(self, x_dim, y_dim, with_r, *args, **kwargs): + super(CoordConv, self).__init__() + self.addcoords = AddCoords(x_dim=x_dim, + y_dim=y_dim, + with_r=with_r) + self.conv = tf.layers.Conv2D(*args, **kwargs) + + def call(self, input_tensor, **kwargs): + ret = self.addcoords(input_tensor) + ret = self.conv(ret) + return ret + + +def coord_conv(x_dim, y_dim, with_r, inputs, *args, **kwargs): + layer = CoordConv(x_dim, y_dim, with_r, *args, **kwargs) + return layer.apply(inputs) + diff --git a/data/config/reward_config.yml b/data/config/reward_config.yml index 4406e29..5dc05fe 100644 --- a/data/config/reward_config.yml +++ b/data/config/reward_config.yml @@ -1,6 +1,4 @@ general: - epochs: 5000 - save_every_epochs: 1 # gpu_usage: 0.1 gpu_usage: 0.8 # with vision # scenario: 'no_obstacles' @@ -8,7 +6,7 @@ general: # scenario: 'hard' # scenario: 'vision' scenario: 'vision_harder' - number_of_unzippers: 10 + number_of_unzippers: 1 openrave_rl: action_step_size: 0.025 @@ -21,24 +19,31 @@ openrave_rl: truncate_penalty: 0.05 model: -# batch_size: 10240 - batch_size: 2000 # with vision potential_points: [2, 0., 0.075, 3, 0., 0.085, 4, -0.02, 0.05, 4, 0.005, 0.05, 5, 0.005, 0.035, 5, -0.02, 0.035] consider_goal_pose: True + goal_configuration_distance_sensitivity: 1.23 -reward: +train: + epochs: 20 + save_every_epochs: 1 + # batch_size: 10240 + batch_size: 300 # with vision + test_every_batches: 1000 initial_learn_rate: 0.001 -# initial_learn_rate: 0.01 - decrease_learn_rate_after: 2000 -# decrease_learn_rate_after: 10000 -# learn_rate_decrease_rate: 0.8 - learn_rate_decrease_rate: 1.0 + decrease_learn_rate_after: 10000 + learn_rate_decrease_rate: 0.5 gradient_limit: 5.0 -# gradient_limit: 0.0 - layers: [100, 100, 100, 100] -# layers: [200, 200, 200, 200, 200] l2_regularization_coefficient: 0.0001 cross_entropy_coefficient: 1.0 + oversample_goal: 0.2 + oversample_collision: 0.38 + +network: + layers: [300, 300, 300, 200, 100, 100] activation: 'elu' - oversample_goal: 1.0 - oversample_collision: 1.0 + use_coordnet: true + image_network: 'resnet' + train_vae: true + vae_latent_dim: 150 + resnet_num_of_residual_blocks: 6 + diff --git a/dqn_model.py b/dqn_model.py index 0bb542f..1431147 100644 --- a/dqn_model.py +++ b/dqn_model.py @@ -1,20 +1,28 @@ import tensorflow as tf +from coordnet_model import coord_conv class DqnModel: - def __init__(self, prefix): + def __init__(self, prefix, config): self.prefix = '{}_dqn'.format(prefix) + self.config = config + self.use_coordnet = self.config['network']['use_coordnet'] def predict(self, workspace_image, reuse_flag): - conv1 = tf.layers.conv2d(workspace_image, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True, - name='{}_conv1'.format(self.prefix), reuse=reuse_flag) - conv2 = tf.layers.conv2d(conv1, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True, + if self.use_coordnet: + workspace_image = coord_conv(55, 111, False, workspace_image, 32, 8, 4, padding='same', + activation=tf.nn.relu, use_bias=True, name='{}_conv1'.format(self.prefix), + _reuse=reuse_flag) + + conv2 = tf.layers.conv2d(workspace_image, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True, name='{}_conv2'.format(self.prefix), reuse=reuse_flag) - # conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True) - # flat = tf.layers.flatten(conv3) - flat = tf.layers.flatten(conv2, name='{}_flat'.format(self.prefix)) + conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True) + + flat = tf.layers.flatten(conv3, name='{}_flat'.format(self.prefix)) dense1 = tf.layers.dense(flat, 512, activation=tf.nn.relu, name='{}_dense1'.format(self.prefix), reuse=reuse_flag) - dense2 = tf.layers.dense(dense1, 512, activation=None, name='{}_dense2'.format(self.prefix), reuse=reuse_flag) - return dense2 - + dense2 = tf.layers.dense(dense1, 512, activation=tf.nn.relu, name='{}_dense2'.format(self.prefix), + reuse=reuse_flag) + dense3 = tf.layers.dense(dense2, 512, activation=tf.nn.relu, name='{}_dense3'.format(self.prefix), + reuse=reuse_flag) + return dense3 diff --git a/generate_reward_workspaces.py b/generate_reward_workspaces.py new file mode 100644 index 0000000..05788bb --- /dev/null +++ b/generate_reward_workspaces.py @@ -0,0 +1,25 @@ +from workspace_generation_utils import * +from image_cache import ImageCache +import os + +TOTAL_WORKSPACES = 10000 +OUTPUT_DIR = "scenario_params/vision_harder" + + +if not os.path.isdir(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + +generator = WorkspaceGenerator(obstacle_count_probabilities={2: 0.05, 3: 0.5, 4: 0.4, 5: 0.05}) +for i in range(TOTAL_WORKSPACES): + save_path = os.path.join(OUTPUT_DIR, '{}_workspace.pkl'.format(i)) + + if os.path.exists(save_path): + print("workspace %d already exists" % i) + continue + + print("generateing workspace %d" % i) + workspace_params = generator.generate_workspace() + workspace_params.save(save_path) + +print("Creating Image Cache") +ImageCache(OUTPUT_DIR, True) diff --git a/image_vae_model.py b/image_vae_model.py new file mode 100644 index 0000000..3f1aef5 --- /dev/null +++ b/image_vae_model.py @@ -0,0 +1,194 @@ +from reward_data_manager import get_image_cache +import time +import datetime +import numpy as np +import os +import yaml +import tensorflow as tf +from vae_network import VAENetwork + + +class VAEModel: + + def __init__(self, model_name, config, models_base_dir, tensorboard_dir): + + self.model_name = model_name + self.config = config + + self.model_dir = os.path.join(models_base_dir, self.model_name) + if not os.path.exists(self.model_dir): + os.makedirs(self.model_dir) + + self.train_summaries = [] + self.test_summaries = [] + + self.epochs = config['general']['epochs'] + self.save_every_epochs = config['general']['save_every_epochs'] + self.train_vae = config['reward']['train_vae'] + + inputs_example = tf.placeholder(tf.float32, (None, 55, 111), name='example') + self.network = VAENetwork(config, self.model_dir, inputs_example.shape) + + self.global_step = 0 + self.global_step_var = tf.Variable(0, trainable=False) + + self.loss = self.init_loss() + self.optimizer = self.init_optimizer() + + with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd: + yaml.dump(config, fd) + + self.train_board = self.TensorBoard(tensorboard_dir, 'train_' + model_name, self.train_summaries) + self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries) + + def load(self, session): + self.network.load_weights(session) + + def make_feed(self, data_batch): + return self.network.make_feed(*data_batch) + + def predict(self, data_batch, session): + feed = self.make_feed(data_batch) + return session.run([self.prediction], feed)[0] + + def init_loss(self): + status_loss_scale = self.config['reward']['cross_entropy_coefficient'] + img_loss, latent_loss, total_loss = self.network.get_loss() + + image_loss_summary = tf.summary.scalar('Image_Loss', img_loss) + latent_loss_summary = tf.summary.scalar('Latent_Loss', latent_loss) + + regularization_loss = tf.losses.get_regularization_loss() + regularization_loss_summary = tf.summary.scalar('Regularization_Loss', regularization_loss) + + # total_loss = total_loss + regularization_loss + total_loss_summary = tf.summary.scalar('Total_Loss', total_loss) + + self.train_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary] + self.test_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary] + + return total_loss + + def init_optimizer(self): + initial_learn_rate = self.config['reward']['initial_learn_rate'] + decrease_learn_rate_after = self.config['reward']['decrease_learn_rate_after'] + learn_rate_decrease_rate = self.config['reward']['learn_rate_decrease_rate'] + + learning_rate = tf.train.exponential_decay(initial_learn_rate, + self.global_step_var, + decrease_learn_rate_after, + learn_rate_decrease_rate, + staircase=True) + self.train_summaries.append(tf.summary.scalar('Learn_Rate', learning_rate)) + + optimizer = tf.train.AdamOptimizer(learning_rate) + + gradients, variables = zip(*optimizer.compute_gradients(self.loss, tf.trainable_variables())) + initial_gradients_norm = tf.global_norm(gradients) + gradient_limit = self.config['reward']['gradient_limit'] + if gradient_limit > 0.0: + gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm) + clipped_gradients_norm = tf.global_norm(gradients) + initial_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Initial', initial_gradients_norm) + clipped_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Clipped', clipped_gradients_norm) + self.train_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary] + self.test_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary] + + return optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step_var) + + def _train_batch(self, train_batch, session): + train_feed = {self.network.workspace_image_inputs: train_batch} + train_summary, self.global_step, img_loss, _ = session.run( + [self.train_board.summaries, self.global_step_var, self.network.encoded, self.optimizer], + train_feed) + # print(img_loss) + self.train_board.writer.add_summary(train_summary, self.global_step) + + def _test_batch(self, test_batch, session): + test_feed = {self.network.workspace_image_inputs: test_batch} + test_summary = session.run( + [self.test_board.summaries], + test_feed)[0] + self.test_board.writer.add_summary(test_summary, self.global_step) + self.test_board.writer.flush() + + def train(self, train_data, test_data, session): + session.run(tf.global_variables_initializer()) + session.run(tf.local_variables_initializer()) + + test_every_batches = self.config['reward']['test_every_batches'] + + total_train_batches = 0 + for epoch in range(self.epochs): + + train_batch_count = 1 + for train_batch in train_data: + self._train_batch(train_batch, session) + print("Finished epoch %d/%d batch %d/%d" % (epoch+1, self.epochs, train_batch_count, total_train_batches)) + train_batch_count += 1 + + if train_batch_count % test_every_batches == 0: + test_batch = next(test_data.__iter__()) # random test batch + self._test_batch(test_batch, session) + # save the model + # self.network.save_weights(session, self.global_step) + + total_train_batches = train_batch_count - 1 + self.train_board.writer.flush() + + test_batch = next(test_data.__iter__()) # random test batch + self._test_batch(test_batch, session) + + # save the model + # if epoch == self.epochs - 1 or epoch % self.save_every_epochs == self.save_every_epochs - 1: + # self.network.save_weights(session, self.global_step) + + print('done epoch {} of {}, global step {}'.format(epoch, self.epochs, self.global_step)) + + class TensorBoard: + + def __init__(self, tensorboard_path, board_name, summaries): + self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name)) + self.summaries = tf.summary.merge(summaries) + + +def count_weights(): + total_parameters = 0 + for variable in tf.trainable_variables(): + # shape is an array of tf.Dimension + shape = variable.get_shape() + variable_parameters = 1 + for dim in shape: + variable_parameters *= dim.value + total_parameters += variable_parameters + print(total_parameters) + +if __name__ == '__main__': + # read the config + config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml') + with open(config_path, 'r') as yml_file: + config = yaml.load(yml_file) + print('------------ Config ------------') + print(yaml.dump(config)) + + model_name = "vae" + datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') + + image_cache = get_image_cache(config) + batch_size = 1 + images_data = [image.np_array for image in image_cache.items.values()] + images_batch_data = [images_data[i:i+batch_size] for i in range(0, len(images_data), batch_size)] + + train_data_count = int(len(images_batch_data) * 0.8) + train_data = images_batch_data[:train_data_count] + test_data = images_batch_data[train_data_count:] + + models_base_dir = os.path.join('data', 'reward', 'model') + vae_model = VAEModel(model_name, config, models_base_dir, tensorboard_dir=models_base_dir) + + + + gpu_usage = config['general']['gpu_usage'] + session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage)) + with tf.Session(config=session_config) as session: + count_weights() + vae_model.train(train_data, test_data, session) diff --git a/pre_trained_reward.py b/pre_trained_reward.py deleted file mode 100644 index 37725c7..0000000 --- a/pre_trained_reward.py +++ /dev/null @@ -1,244 +0,0 @@ -import bz2 -import numpy as np -import random -import pickle -import os -import tensorflow as tf -import tensorflow.contrib.layers as tf_layers - -from dqn_model import DqnModel -from modeling_utils import get_activation - - -class PreTrainedReward: - - def __init__(self, model_name, config): - self._reuse_flag = False - - self.config = config - self.is_vision_enabled = 'vision' in config['general']['scenario'] - - self.joints_inputs = tf.placeholder(tf.float32, (None, 4), name='joints_inputs') - self.goal_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='goal_joints_inputs') - self.workspace_image_inputs, self.images_3d = None, None - if self.is_vision_enabled: - self.workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs') - self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1) - self.goal_pose_inputs = tf.placeholder(tf.float32, (None, 2), name='goal_pose_inputs') - self.action_inputs = tf.placeholder(tf.float32, (None, 4), name='action_inputs') - self.transition_label = tf.placeholder_with_default([[0.0]*3], (None, 3), name='labeled_transition') - current_variables_count = len(tf.trainable_variables()) - self.reward_prediction, self.status_softmax_logits = self.create_reward_network( - self.joints_inputs, self.action_inputs, self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d - ) - reward_variables = tf.trainable_variables()[current_variables_count:] - - # model path to load - self.model_name = model_name - self.saver_dir = os.path.join(os.getcwd(), 'data', 'reward', 'model', model_name) - assert os.path.exists(self.saver_dir) - self.saver = tf.train.Saver(reward_variables, max_to_keep=4, save_relative_paths=self.saver_dir) - - @staticmethod - def _generate_goal_features(goal_joints_inputs, goal_pose_inputs): - features = [goal_joints_inputs] - if goal_pose_inputs is not None: - features.append(goal_pose_inputs) - return tf.concat(features, axis=1) - - def _next_state_model(self, joints_inputs, action_inputs): - # next step is a deterministic computation - action_step_size = self.config['openrave_rl']['action_step_size'] - step = action_inputs * action_step_size - unclipped_result = joints_inputs + step - # we initiate an openrave manager to get the robot, to get the joint bounds and the safety - joint_safety = 0.0001 - lower_bounds = [-2.617, -1.571, -1.571, -1.745, -2.617] - lower_bounds = [b + joint_safety for b in lower_bounds[1:]] - upper_bounds = [-b for b in lower_bounds] - - # clip the result - clipped_result = tf.maximum(unclipped_result, lower_bounds) - clipped_result = tf.minimum(clipped_result, upper_bounds) - return clipped_result, unclipped_result - - def create_reward_network( - self, joints_inputs, action_inputs, goal_joints_inputs, goal_pose_inputs, images_3d): - name_prefix = 'reward' - # get the next joints - clipped_next_joints, unclipped_next_joints = self._next_state_model(joints_inputs, action_inputs) - - # predict the transition classification - layers = self.config['reward']['layers'] + [3] - scale = 0.0 - if 'l2_regularization_coefficient' in self.config['reward']: - scale = self.config['reward']['l2_regularization_coefficient'] - current = tf.concat( - (clipped_next_joints, self._generate_goal_features(goal_joints_inputs, goal_pose_inputs)), axis=1) - # add vision if needed - if self.is_vision_enabled: - visual_inputs = DqnModel(name_prefix).predict(images_3d, self._reuse_flag) - current = tf.concat((current, visual_inputs), axis=1) - for i, layer_size in enumerate(layers): - _activation = None if i == len(layers) - 1 else get_activation(self.config['reward']['activation']) - current = tf.layers.dense( - current, layer_size, activation=_activation, name='{}_layers_{}'.format(name_prefix, i), - kernel_regularizer=tf_layers.l2_regularizer(scale), reuse=self._reuse_flag - ) - softmax_logits = current - softmax_res = tf.nn.softmax(softmax_logits) - - # if the one-hot input is fed, is labeled will be 1.0 otherwise it will be zero - is_labeled = tf.expand_dims(tf.reduce_max(self.transition_label, axis=1), axis=1) - reward_calculation_input = self.transition_label + tf.multiply(1.0 - is_labeled, softmax_res) - - # get the classification reward - classification_reward = tf.layers.dense( - reward_calculation_input, 1, activation=None, use_bias=False, - name='{}_classification_reward'.format(name_prefix), reuse=self._reuse_flag - ) - - # get the clipping-related reward - # clipped_difference = tf.expand_dims(tf.norm(unclipped_next_joints - clipped_next_joints, axis=1), axis=1) # this is the original - # clipped_difference = tf.expand_dims(tf.reduce_sum(tf.zeros_like(clipped_next_joints), axis=1), axis=1) # this will have no gradient backlash - clipped_difference = tf.expand_dims(tf.reduce_sum(tf.abs(unclipped_next_joints - clipped_next_joints), axis=1), axis=1) - - clipping_reward = tf.layers.dense( - clipped_difference, 1, activation=None, use_bias=False, name='{}_clipping_weight'.format(name_prefix), - reuse=self._reuse_flag - ) - - total_reward = classification_reward + clipping_reward - self._reuse_flag = True - return total_reward, softmax_logits - - def load_weights(self, sess): - self.saver.restore(sess, tf.train.latest_checkpoint(self.saver_dir)) - - def make_prediction(self, sess, all_start_joints, all_goal_joints, all_actions, all_goal_poses, - all_transition_labels=None, images=None): - feed = self.make_feed(all_start_joints, all_goal_joints, all_actions, all_goal_poses, images=images, - all_transition_labels=all_transition_labels) - return sess.run([self.reward_prediction, self.status_softmax_logits], feed) - - def make_feed(self, all_start_joints, all_goal_joints, all_actions, all_goal_poses, images=None, - all_transition_labels=None): - feed = { - self.joints_inputs: all_start_joints, - self.goal_joints_inputs: all_goal_joints, - self.action_inputs: all_actions, - } - if self.goal_pose_inputs is not None: - feed[self.goal_pose_inputs] = all_goal_poses - if self.is_vision_enabled: - assert images is not None - assert images[0] is not None - feed[self.workspace_image_inputs] = images - if all_transition_labels is not None: - feed[self.transition_label] = all_transition_labels - return feed - - -def oversample_batch(current_batch, oversample_large_magnitude=None): - if oversample_large_magnitude is None: - return current_batch - oversample_success = oversample_large_magnitude[0] - oversample_collision = oversample_large_magnitude[1] - status = [b[-1] for b in current_batch] - success_reward_indices = [i for i, s in enumerate(status) if s == 3] - if len(success_reward_indices) < 3: - return None - collision_reward_indices = [i for i, s in enumerate(status) if s == 2] - if len(collision_reward_indices) < 3: - return None - other_reward_indices = [i for i, s in enumerate(status) if s == 1] - assert len(success_reward_indices) + len(collision_reward_indices) + len(other_reward_indices) == len(current_batch) - sample_size = len(other_reward_indices) - batch_indices = other_reward_indices - # sample_size = min(100, len(other_reward_indices)) - # batch_indices = list(np.random.choice(other_reward_indices, sample_size)) - success_sample_size = int(oversample_success * sample_size) - success_super_sample = list(np.random.choice(success_reward_indices, success_sample_size)) - batch_indices.extend(success_super_sample) - collision_sample_size = int(oversample_collision * sample_size) - collision_super_sample = list(np.random.choice(collision_reward_indices, collision_sample_size)) - batch_indices.extend(collision_super_sample) - return [current_batch[i] for i in batch_indices] - - -def get_batch_and_labels(batch, openrave_manager, image_cache): - all_start_joints = [] - all_goal_joints = [] - all_actions = [] - all_rewards = [] - all_goal_poses = [] - all_status = [] - all_images = None - if image_cache is not None: - all_images = [] - for i in range(len(batch)): - if image_cache is None: - workspace_id = None - start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i] - else: - workspace_id, start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i] - goal_pose = openrave_manager.get_target_pose(goal_joints) - all_start_joints.append(start_joints[1:]) - all_goal_joints.append(goal_joints[1:]) - all_actions.append(action[1:]) - all_rewards.append(reward) - all_status.append(status) - all_goal_poses.append(goal_pose) - if image_cache is not None: - image = image_cache.items[workspace_id].np_array - all_images.append(image) - return [all_start_joints, all_goal_joints, all_actions, all_goal_poses, all_images], all_rewards, all_status - - -def compute_stats_single_class(real_status, real_reward, status_prediction, reward_prediction, class_indicator): - class_indices = [i for i, s in enumerate(real_status) if s == class_indicator] - if len(class_indices) == 0: - accuracy = 0.0 - class_average_absolute_error = 0.0 - class_max_absolute_error = 0.0 - else: - my_status_prediction = [status_prediction[i] for i in class_indices] - best_label = np.argmax(my_status_prediction, axis=1) - best_label += 1 - hit_cont = len([i for i, b in enumerate(best_label) if b == class_indicator]) - accuracy = float(hit_cont) / len(class_indices) - difference = [np.abs(real_reward[i] - reward_prediction[i]) for i in class_indices] - class_average_absolute_error = np.mean(difference) - class_max_absolute_error = np.max(difference) - return class_indices, [class_average_absolute_error, class_max_absolute_error, accuracy] - - -def compute_stats_per_class(real_status, real_reward, status_prediction, reward_prediction): - goal_rewards_indices, goal_stats = compute_stats_single_class( - real_status, real_reward, status_prediction, reward_prediction, 3) - collision_rewards_indices, collision_stats = compute_stats_single_class( - real_status, real_reward, status_prediction, reward_prediction, 2) - other_rewards_indices, other_stats = compute_stats_single_class( - real_status, real_reward, status_prediction, reward_prediction, 1) - assert len(goal_rewards_indices) + len(collision_rewards_indices) + len(other_rewards_indices) == len(real_reward) - return goal_stats, collision_stats, other_stats - - -def load_data_from(data_dir, max_read=None, is_vision=False): - assert os.path.exists(data_dir) - files = [file for file in os.listdir(data_dir) if file.endswith(".pkl")] - assert len(files) > 0 - random.shuffle(files) - total_buffer = [] - for file in files: - if max_read is not None and len(total_buffer) > max_read: - break - compressed_file = bz2.BZ2File(os.path.join(data_dir, file), 'r') - current_buffer = pickle.load(compressed_file) - compressed_file.close() - if is_vision: - parts = file.split('_') - workspace_id = '{}_{}.pkl'.format(parts[0], parts[1]) - current_buffer = [tuple([workspace_id] + list(t)) for t in current_buffer] - total_buffer.extend(current_buffer) - return total_buffer diff --git a/resnet_model.py b/resnet_model.py new file mode 100644 index 0000000..a592b67 --- /dev/null +++ b/resnet_model.py @@ -0,0 +1,241 @@ +# Most of the code in this model was taken from: https://github.com/wenxinxu/resnet-in-tensorflow/blob/master/resnet.py + +import tensorflow as tf +from coordnet_model import coord_conv +import numpy as np +import yaml +import os +import datetime +import time + +BN_EPSILON = 0.001 + + +class ResNetModel: + def __init__(self, prefix, config): + self.prefix = '{}_resnet'.format(prefix) + self.config = config + self.use_coordnet = self.config['network']['use_coordnet'] + self.l2_regularization_coefficient = self.config['train']['l2_regularization_coefficient'] + + def activation_summary(self, x): + """ + :param x: A Tensor + :return: Add histogram summary and scalar summary of the sparsity of the tensor + """ + tensor_name = x.op.name + tf.summary.histogram(tensor_name + '/activations', x) + tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) + + def create_variables(self, name, shape, initializer=tf.contrib.layers.xavier_initializer(), is_fc_layer=False): + """ + :param name: A string. The name of the new variable + :param shape: A list of dimensions + :param initializer: User Xavier as default. + :param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc + layers. + :return: The created variable + """ + + ## TODO: to allow different weight decay to fully connected layer and conv layer + regularizer = tf.contrib.layers.l2_regularizer(scale=self.l2_regularization_coefficient) + + new_variables = tf.get_variable(name, shape=shape, initializer=initializer, + regularizer=regularizer) + return new_variables + + def output_layer(self, input_layer, num_labels): + """ + :param input_layer: 2D tensor + :param num_labels: int. How many output labels in total? (10 for cifar10 and 100 for cifar100) + :return: output layer Y = WX + B + """ + input_dim = input_layer.get_shape().as_list()[-1] + fc_w = self.create_variables(name='fc_weights', shape=[input_dim, num_labels], is_fc_layer=True, + initializer=tf.uniform_unit_scaling_initializer(factor=1.0)) + fc_b = self.create_variables(name='fc_bias', shape=[num_labels], initializer=tf.zeros_initializer()) + + fc_h = tf.matmul(input_layer, fc_w) + fc_b + return fc_h + + def batch_normalization_layer(self, input_layer, dimension): + """ + Helper function to do batch normalziation + :param input_layer: 4D tensor + :param dimension: input_layer.get_shape().as_list()[-1]. The depth of the 4D tensor + :return: the 4D tensor after being normalized + """ + mean, variance = tf.nn.moments(input_layer, axes=[0, 1, 2]) + beta = tf.get_variable('beta', dimension, tf.float32, + initializer=tf.constant_initializer(0.0, tf.float32)) + gamma = tf.get_variable('gamma', dimension, tf.float32, + initializer=tf.constant_initializer(1.0, tf.float32)) + bn_layer = tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, BN_EPSILON) + + return bn_layer + + def conv_bn_relu_layer(self, input_layer, filter_shape, stride): + """ + A helper function to conv, batch normalize and relu the input tensor sequentially + :param input_layer: 4D tensor + :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number] + :param stride: stride size for conv + :return: 4D tensor. Y = Relu(batch_normalize(conv(X))) + """ + + out_channel = filter_shape[-1] + + if self.use_coordnet: + conv_layer = coord_conv(55, 111, False, input_layer, filter_shape[-1], filter_shape[0:2], stride, + padding='same', use_bias=True, name='{}_conv1'.format(self.prefix)) + else: + filter = self.create_variables(name='conv', shape=filter_shape) + conv_layer = tf.nn.conv2d(input_layer, filter, strides=[1, stride, stride, 1], padding='SAME') + bn_layer = self.batch_normalization_layer(conv_layer, out_channel) + + output = tf.nn.relu(bn_layer) + return output + + def bn_relu_conv_layer(self, input_layer, filter_shape, stride): + """ + A helper function to batch normalize, relu and conv the input layer sequentially + :param input_layer: 4D tensor + :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number] + :param stride: stride size for conv + :return: 4D tensor. Y = conv(Relu(batch_normalize(X))) + """ + + in_channel = input_layer.get_shape().as_list()[-1] + + bn_layer = self.batch_normalization_layer(input_layer, in_channel) + relu_layer = tf.nn.relu(bn_layer) + + filter = self.create_variables(name='conv', shape=filter_shape) + conv_layer = tf.nn.conv2d(relu_layer, filter, strides=[1, stride, stride, 1], padding='SAME') + return conv_layer + + def residual_block(self, input_layer, output_channel, first_block=False): + """ + Defines a residual block in ResNet + :param input_layer: 4D tensor + :param output_channel: int. return_tensor.get_shape().as_list()[-1] = output_channel + :param first_block: if this is the first residual block of the whole network + :return: 4D tensor. + """ + input_channel = input_layer.get_shape().as_list()[-1] + + # When it's time to "shrink" the image size, we use stride = 2 + if input_channel * 2 == output_channel: + increase_dim = True + stride = 2 + elif input_channel == output_channel: + increase_dim = False + stride = 1 + else: + raise ValueError('Output and input channel does not match in residual blocks!!!') + + # The first conv layer of the first residual block does not need to be normalized and relu-ed. + with tf.variable_scope('conv1_in_block'): + if first_block: + filter = self.create_variables(name='conv', shape=[3, 3, input_channel, output_channel]) + conv1 = tf.nn.conv2d(input_layer, filter=filter, strides=[1, 1, 1, 1], padding='SAME') + else: + conv1 = self.bn_relu_conv_layer(input_layer, [3, 3, input_channel, output_channel], stride) + + with tf.variable_scope('conv2_in_block'): + conv2 = self.bn_relu_conv_layer(conv1, [3, 3, output_channel, output_channel], 1) + + # When the channels of input layer and conv2 does not match, we add zero pads to increase the + # depth of input layers + if increase_dim is True: + pooled_input = tf.nn.avg_pool(input_layer, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='VALID') + padding_w = input_layer.shape[1].value % 2 + padding_h = input_layer.shape[2].value % 2 + padded_input = tf.pad(pooled_input, [[0, 0], [padding_w, 0], [padding_h, 0], [input_channel // 2, + input_channel // 2]]) + else: + padded_input = input_layer + + output = conv2 + padded_input + return output + + def predict(self, input_tensor_batch, n, reuse): + """ + The main function that defines the ResNet. total layers = 1 + 2n + 2n + 2n +1 = 6n + 2 + :param input_tensor_batch: 4D tensor + :param n: num_residual_blocks + :param reuse: To build train graph, reuse=False. To build validation graph and share weights + with train graph, resue=True + :return: last layer in the network. Not softmax-ed + """ + + layers = [] + with tf.variable_scope('conv0', reuse=reuse): + if self.use_coordnet: + first_filter_shape = [3, 3, 3, 16] + else: + first_filter_shape = [3, 3, 1, 16] + conv0 = self.conv_bn_relu_layer(input_tensor_batch, first_filter_shape, 1) + self.activation_summary(conv0) + layers.append(conv0) + + for i in range(n): + with tf.variable_scope('conv1_%d' % i, reuse=reuse): + if i == 0: + conv1 = self.residual_block(layers[-1], 16, first_block=True) + else: + conv1 = self.residual_block(layers[-1], 16) + self.activation_summary(conv1) + layers.append(conv1) + + for i in range(n): + with tf.variable_scope('conv2_%d' % i, reuse=reuse): + conv2 = self.residual_block(layers[-1], 32) + self.activation_summary(conv2) + layers.append(conv2) + + for i in range(n): + with tf.variable_scope('conv3_%d' % i, reuse=reuse): + conv3 = self.residual_block(layers[-1], 64) + layers.append(conv3) + # assert conv3.get_shape().as_list()[1:] == [8, 8, 64] + + with tf.variable_scope('fc', reuse=reuse): + in_channel = layers[-1].get_shape().as_list()[-1] + bn_layer = self.batch_normalization_layer(layers[-1], in_channel) + relu_layer = tf.nn.relu(bn_layer) + global_pool = tf.reduce_mean(relu_layer, [1, 2]) + + assert global_pool.get_shape().as_list()[-1:] == [64] + output = self.output_layer(global_pool, 10) + layers.append(output) + + return layers[-1] + + def test_graph(self, train_dir='logs'): + """ + Run this function to look at the graph structure on tensorboard. A fast way! + :param train_dir: + """ + if self.use_coordnet: + input_tensor = tf.constant(np.ones([111, 55, 32, 3]), dtype=tf.float32) + else: + input_tensor = tf.constant(np.ones([128, 32, 32, 1]), dtype=tf.float32) + result = self.predict(input_tensor, 2, reuse=False) + init = tf.initialize_all_variables() + sess = tf.Session() + sess.run(init) + summary_writer = tf.summary.FileWriter(train_dir, graph=sess.graph) + + +if __name__ == '__main__': + # read the config + config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml') + with open(config_path, 'r') as yml_file: + config = yaml.load(yml_file) + print('------------ Config ------------') + print(yaml.dump(config)) + model_name = "renset_" + datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') + resnet = ResNetModel(model_name, config) + resnet.test_graph() diff --git a/reward_collision_model.py b/reward_collision_model.py new file mode 100644 index 0000000..ebb8093 --- /dev/null +++ b/reward_collision_model.py @@ -0,0 +1,206 @@ +from reward_collision_network import CollisionNetwork +import reward_data_manager +from reward_data_manager import get_train_and_test_datasets, get_batch_and_labels, get_image_cache +import time +import datetime +import numpy as np +import os +import yaml +import tensorflow as tf + + +class CollisionModel: + + def __init__(self, model_name, config, models_base_dir, tensorboard_dir): + + self.model_name = model_name + self.config = config + + self.model_dir = os.path.join(models_base_dir, self.model_name) + if not os.path.exists(self.model_dir): + os.makedirs(self.model_dir) + + self.train_summaries = [] + self.test_summaries = [] + + self.epochs = config['train']['epochs'] + self.save_every_epochs = config['train']['save_every_epochs'] + + self.network = CollisionNetwork(config, self.model_dir) + self.net_output = self.network.status_softmax_logits + self.status_input = tf.placeholder(tf.int32, [None, ]) + # TODO: fix collision_prob + self.collision_prob = tf.expand_dims(tf.nn.softmax(self.net_output)[:, 1], -1) + self.prediction = tf.argmax(tf.nn.softmax(self.net_output), axis=1, output_type=tf.int32) + + self.global_step = 0 + self.global_step_var = tf.Variable(0, trainable=False) + + self.loss = self.init_loss() + self.test_measures = self.add_test_measures() + self.optimizer = self.init_optimizer() + + with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd: + yaml.dump(config, fd) + + self.train_board = self.TensorBoard(tensorboard_dir, 'train_' + model_name, self.train_summaries) + self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries) + + def load(self, session): + self.network.load_weights(session) + + def make_feed(self, data_batch): + return self.network.make_feed(*data_batch) + + def predict(self, data_batch, session): + feed = self.make_feed(data_batch) + return session.run([self.prediction], feed)[0] + + def init_loss(self): + status_loss_scale = self.config['train']['cross_entropy_coefficient'] + status_loss = status_loss_scale * \ + tf.losses.sparse_softmax_cross_entropy(labels=self.status_input - 1, logits=self.net_output) + status_loss_summary = tf.summary.scalar('Status_Loss', status_loss) + + regularization_loss = tf.losses.get_regularization_loss() + regularization_loss_summary = tf.summary.scalar('Regularization_Loss', regularization_loss) + + total_loss = status_loss + regularization_loss + total_loss_summary = tf.summary.scalar('Total_Loss', total_loss) + + self.train_summaries += [status_loss_summary, regularization_loss_summary, total_loss_summary] + self.test_summaries += [status_loss_summary, regularization_loss_summary, total_loss_summary] + + return total_loss + + def add_test_measures(self): + labels = self.status_input - 1 + accuracy = tf.reduce_mean(tf.cast(tf.equal(labels, self.prediction), tf.float32)) + TP = tf.count_nonzero((self.prediction) * (labels), dtype=tf.float32) + tf.Variable(0.001) + TN = tf.count_nonzero((self.prediction - 1) * (labels - 1), dtype=tf.float32) + tf.Variable(0.001) + FP = tf.count_nonzero(self.prediction * (labels - 1), dtype=tf.float32) + tf.Variable(0.001) + FN = tf.count_nonzero((self.prediction - 1) * labels, dtype=tf.float32) + tf.Variable(0.001) + precision = TP / (TP + FP) + recall = TP / (TP + FN) + accuracy_summary = tf.summary.scalar('Accuracy', accuracy) + recall_summary = tf.summary.scalar('Recall', recall) + precision_summary = tf.summary.scalar('Precision', precision) + self.test_summaries += [accuracy_summary, recall_summary, precision_summary] + self.train_summaries += [accuracy_summary, recall_summary, precision_summary] + return [accuracy, recall, precision] + + def init_optimizer(self): + initial_learn_rate = self.config['train']['initial_learn_rate'] + decrease_learn_rate_after = self.config['train']['decrease_learn_rate_after'] + learn_rate_decrease_rate = self.config['train']['learn_rate_decrease_rate'] + + learning_rate = tf.train.exponential_decay(initial_learn_rate, + self.global_step_var, + decrease_learn_rate_after, + learn_rate_decrease_rate, + staircase=True) + self.train_summaries.append(tf.summary.scalar('Learn_Rate', learning_rate)) + + optimizer = tf.train.AdamOptimizer(learning_rate) + + gradients, variables = zip(*optimizer.compute_gradients(self.loss, tf.trainable_variables())) + initial_gradients_norm = tf.global_norm(gradients) + gradient_limit = self.config['train']['gradient_limit'] + if gradient_limit > 0.0: + gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm) + clipped_gradients_norm = tf.global_norm(gradients) + initial_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Initial', initial_gradients_norm) + clipped_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Clipped', clipped_gradients_norm) + self.train_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary] + self.test_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary] + + return optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step_var) + + def _train_batch(self, train_batch, train_status_batch, session): + batch_start_joints, batch_actions, batch_images, _, _ = train_batch + train_feed = self.network.make_feed(batch_start_joints, batch_actions, batch_images) + train_feed[self.status_input] = np.array(train_status_batch) + train_summary, self.global_step, _, _ = session.run( + [self.train_board.summaries, self.global_step_var, self.optimizer, self.test_measures], + train_feed) + self.train_board.writer.add_summary(train_summary, self.global_step) + + def _test_batch(self, test_batch, test_status_batch, session): + batch_start_joints, batch_actions, batch_images, _, _ = test_batch + test_feed = self.network.make_feed(batch_start_joints, batch_actions, batch_images) + test_feed[self.status_input] = np.array(test_status_batch) + test_summary = session.run( + [self.test_board.summaries] + self.test_measures, + test_feed)[0] + self.test_board.writer.add_summary(test_summary, self.global_step) + self.test_board.writer.flush() + + def train(self, train_data, test_data, image_cache, session): + session.run(tf.global_variables_initializer()) + session.run(tf.local_variables_initializer()) + + test_every_batches = self.config['train']['test_every_batches'] + + total_train_batches = 0 + for epoch in range(self.epochs): + + train_batch_count = 1 + for train_batch in train_data: + + train_batch, train_status_batch = get_batch_and_labels(train_batch, image_cache) + # assert if train_status contains goal status + assert(np.all(np.array(train_status_batch) != reward_data_manager.GOAL_STATUS)) + + self._train_batch(train_batch, train_status_batch, session) + print("Finished epoch %d/%d batch %d/%d" % (epoch+1, self.epochs, train_batch_count, total_train_batches)) + train_batch_count += 1 + + if train_batch_count % test_every_batches == 0: + test_batch = next(test_data.__iter__()) # random test batch + test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache) + self._test_batch(test_batch, test_status_batch, session) + # save the model + self.network.save_weights(session, self.global_step) + + total_train_batches = train_batch_count - 1 + self.train_board.writer.flush() + + test_batch = next(test_data.__iter__()) # random test batch + test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache) + self._test_batch(test_batch, test_status_batch, session) + + # save the model + if epoch == self.epochs - 1 or epoch % self.save_every_epochs == self.save_every_epochs - 1: + self.network.save_weights(session, self.global_step) + + print('done epoch {} of {}, global step {}'.format(epoch, self.epochs, self.global_step)) + + class TensorBoard: + + def __init__(self, tensorboard_path, board_name, summaries): + self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name)) + self.summaries = tf.summary.merge(summaries) + + +if __name__ == '__main__': + # read the config + config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml') + with open(config_path, 'r') as yml_file: + config = yaml.load(yml_file) + print('------------ Config ------------') + print(yaml.dump(config)) + + model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') + collision_model_name = "collision_simple_trained" + models_base_dir = os.path.join('data', 'reward', 'model') + collision_model = CollisionModel(model_name, config, models_base_dir, tensorboard_dir=models_base_dir) + + train_data, test_data = get_train_and_test_datasets(config, is_collision_model=True) + image_cache = get_image_cache(config) + + gpu_usage = config['general']['gpu_usage'] + session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage)) + with tf.Session(config=session_config) as session: + collision_model.train(train_data, test_data, image_cache, session) + train_data.stop() + test_data.stop() diff --git a/reward_collision_network.py b/reward_collision_network.py new file mode 100644 index 0000000..d2b33e9 --- /dev/null +++ b/reward_collision_network.py @@ -0,0 +1,107 @@ +import os +import tensorflow as tf +import tensorflow.contrib.layers as tf_layers +from dqn_model import DqnModel +from modeling_utils import get_activation +from resnet_model import ResNetModel + + +class CollisionNetwork: + + def __init__(self, config, model_dir): + self._reuse_flag = False + + self.config = config + self.is_vision_enabled = 'vision' in config['general']['scenario'] + self.image_network = config['network']['image_network'] + assert('dqn' == self.image_network or 'resnet' == self.image_network) + + self.joints_inputs = tf.placeholder(tf.float32, (None, 4), name='joints_inputs') + self.action_inputs = tf.placeholder(tf.float32, (None, 4), name='action_inputs') + + self.workspace_image_inputs, self.images_3d = None, None + if self.is_vision_enabled: + self.workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs') + self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1) + + current_variables_count = len(tf.trainable_variables()) + self.status_softmax_logits = self._create_network(self.joints_inputs, self.action_inputs, self.images_3d) + self.collision_variables = tf.trainable_variables()[current_variables_count:] + + # model path to load + self.model_dir = model_dir + assert os.path.exists(self.model_dir) + self.saver_path = os.path.join(self.model_dir, "model_saver") + self.saver = tf.train.Saver(self.collision_variables, max_to_keep=4, save_relative_paths=True) + + def _next_state_model(self, joints_inputs, action_inputs): + # next step is a deterministic computation + action_step_size = self.config['openrave_rl']['action_step_size'] + step = action_inputs * action_step_size + unclipped_result = joints_inputs + step + # we initiate an openrave manager to get the robot, to get the joint bounds and the safety + joint_safety = 0.0001 + lower_bounds = [-2.617, -1.571, -1.571, -1.745, -2.617] + lower_bounds = [b + joint_safety for b in lower_bounds[1:]] + upper_bounds = [-b for b in lower_bounds] + + # clip the result + clipped_result = tf.maximum(unclipped_result, lower_bounds) + clipped_result = tf.minimum(clipped_result, upper_bounds) + return clipped_result, unclipped_result + + def _create_network(self, joints_inputs, action_inputs, images_3d): + name_prefix = 'reward' + + # get L2 regularization scale + l2_scale = 0.0 + if 'l2_regularization_coefficient' in self.config['train']: + l2_scale = self.config['train']['l2_regularization_coefficient'] + + # get the next joints + clipped_next_joints, unclipped_next_joints = self._next_state_model(joints_inputs, action_inputs) + current = tf.concat((joints_inputs, clipped_next_joints), axis=1) + + # add vision if needed + if self.is_vision_enabled: + if self.image_network == 'dqn': + visual_inputs = DqnModel(name_prefix, self.config).predict(images_3d, self._reuse_flag) + else: + num_of_residual_blocks = self.config['network']['resnet_num_of_residual_blocks'] + visual_inputs = ResNetModel(name_prefix, self.config).predict(images_3d, + num_of_residual_blocks, + self._reuse_flag) + current = tf.concat((current, visual_inputs), axis=1) + + layers = self.config['network']['layers'] + [2] + for i, layer_size in enumerate(layers): + _activation = get_activation(self.config['network']['activation']) if i < len(layers) - 1 else None + current = tf.layers.dense( + current, + layer_size, + activation=_activation, + name='{}_layers_{}'.format(name_prefix, i), + kernel_regularizer=tf_layers.l2_regularizer(l2_scale), + reuse=self._reuse_flag + ) + softmax_logits = current + + self._reuse_flag = True + return softmax_logits + + def save_weights(self, sess, global_step=None): + self.saver.save(sess, self.saver_path, global_step=global_step) + + def load_weights(self, sess): + self.saver.restore(sess, tf.train.latest_checkpoint(self.model_dir)) + + def make_feed(self, all_start_joints, all_actions, images=None): + feed = { + self.joints_inputs: all_start_joints, + self.action_inputs: all_actions, + } + if self.is_vision_enabled: + assert images is not None + assert images[0] is not None + feed[self.workspace_image_inputs] = images + return feed diff --git a/reward_data_manager.py b/reward_data_manager.py new file mode 100644 index 0000000..1996a74 --- /dev/null +++ b/reward_data_manager.py @@ -0,0 +1,253 @@ +import time +import numpy as np +import random +import os +import multiprocessing +import Queue +import pickle +import bz2 +from image_cache import ImageCache + +FREE_TRANSITION_STATUS = 1 +COLLISION_STATUS = 2 +GOAL_STATUS = 3 + + +class UnzipperProcess(multiprocessing.Process): + def __init__(self, number_of_unzippers, files_queue, result_queue, unzipper_specific_queue): + multiprocessing.Process.__init__(self) + self.number_of_unzippers = number_of_unzippers + self.files_queue = files_queue + self.result_queue = result_queue + self.unzipper_specific_queue = unzipper_specific_queue + + def run(self): + while True: + try: + if self.result_queue.empty(): + next_file = self.files_queue.get(block=True, timeout=1) + with open(next_file, 'r') as source_file: + result = pickle.load(source_file) + self.result_queue.put(result) + self.result_queue.task_done() + else: + time.sleep(1) + except Queue.Empty: + time.sleep(1) + pass + try: + # only option is to break + task_type = self.unzipper_specific_queue.get(block=True, timeout=0.001) + break + except Queue.Empty: + pass + + +class UnzipperIterator: + def __init__(self, number_of_unzippers, files): + self.number_of_unzippers = number_of_unzippers + self.files = files + + self.unzipper_specific_queues = [multiprocessing.JoinableQueue() for _ in range(number_of_unzippers)] + self.files_queue = multiprocessing.JoinableQueue() + self.results_queue = multiprocessing.JoinableQueue() + + self.unzippers = [UnzipperProcess( + number_of_unzippers, self.files_queue, self.results_queue, self.unzipper_specific_queues[i]) + for i in range(number_of_unzippers)] + + for u in self.unzippers: + u.start() + + def __iter__(self): + random.shuffle(self.files) + # put all the files + for f in self.files: + self.files_queue.put(f) + # when ready - collect the zipped files + for i in range(len(self.files)): + unzipped = self.results_queue.get() + yield unzipped + + def end(self): + for u in self.unzippers: + u.terminate() + time.sleep(10) + + +class RewardDataLoader: + def __init__(self, data_dir, status_to_read, number_of_unzippers=None): + assert os.path.exists(data_dir) + self.cache_dir = data_dir.replace('supervised_data', 'supervised_data_cache') + if not os.path.exists(self.cache_dir): + self._create_cache(data_dir, self.cache_dir) + + self.files = [os.path.join(self.cache_dir, f) + for f in os.listdir(self.cache_dir) + if f.endswith(".pkl") and f.startswith('{}_'.format(status_to_read))] + + self.files_iterator = None + if number_of_unzippers is not None: + self.files_iterator = UnzipperIterator(number_of_unzippers, self.files) + + @staticmethod + def _create_cache(data_dir, cache_dir): + print('creating cache for {} in {}'.format(data_dir, cache_dir)) + os.makedirs(cache_dir) + files = [f for f in os.listdir(data_dir) if f.endswith(".pkl")] + for f in files: + destination_file = os.path.join(cache_dir, f) + print('caching {}'.format(f)) + with bz2.BZ2File(os.path.join(data_dir, f), 'r') as compressed_file: + data = pickle.load(compressed_file) + with open(destination_file, 'w') as cache_file: + pickle.dump(data, cache_file) + print('done creating cache for {} in {}'.format(data_dir, cache_dir)) + + def __iter__(self): + random.shuffle(self.files) + if self.files_iterator is None: + for f in self.files: + with open(f, 'r') as source_file: + yield pickle.load(source_file) + else: + for content in self.files_iterator: + yield content + + def stop(self): + if self.files_iterator is not None: + self.files_iterator.end() + + +class Batcher: + def __init__(self, input_iterator, batch_size, shuffle_before_yield): + self.input_iterator = input_iterator + self.batch_size = batch_size + self.shuffle_before_yield = shuffle_before_yield + + def __iter__(self): + current_batch = [] + for tuple_list in self.input_iterator: + for t in tuple_list: + current_batch.append(t) + if len(current_batch) == self.batch_size: + if self.shuffle_before_yield: + random.shuffle(current_batch) + yield current_batch + current_batch = [] + if self.shuffle_before_yield: + random.shuffle(current_batch) + yield current_batch + + +class Oversampler: + def __init__(self, data_dir, free_class_batch_size, oversample_goal, oversample_collision, + shuffle_batch_multiplier=2, number_of_unzippers=None): + self.data_dir = data_dir + self.oversample_goal = oversample_goal + self.oversample_collision = oversample_collision + + # load data + self.all_collisions = self._load_all(RewardDataLoader(data_dir, 2, None)) + self.all_goals = self._load_all(RewardDataLoader(data_dir, 3, None)) + self.free_transitions_iterator = RewardDataLoader(data_dir, 1, number_of_unzippers=number_of_unzippers) + # self._describe_data(self.free_transitions_iterator) # TODO: UNCOMMENT THIS + + inner_batcher = Batcher(self.free_transitions_iterator, free_class_batch_size * shuffle_batch_multiplier, True) + self.batcher = Batcher(inner_batcher, free_class_batch_size, False) + + def _describe_data(self, free_transitions_iterator): + free_count = 0 + for tuple_list in free_transitions_iterator: + free_count += len(tuple_list) + collision_count = len(self.all_collisions) + goal_count = len(self.all_goals) + all_count = free_count + collision_count + goal_count + + print('data dir: {}'.format(self.data_dir)) + print('free: {} ({})'.format(free_count, float(free_count) / all_count)) + print('collision: {} ({})'.format(collision_count, float(collision_count) / all_count)) + print('goal: {} ({})'.format(goal_count, float(goal_count) / all_count)) + print('') + + @staticmethod + def _load_all(files_iterator): + all_transitions = [] + for tuple_list in files_iterator: + all_transitions.extend(tuple_list) + return all_transitions + + def _oversample_result(self, free_transition_batch): + batch_size = len(free_transition_batch) + goal_sample_size = int(self.oversample_goal * batch_size) + goal_indices = list(np.random.choice(len(self.all_goals), goal_sample_size)) + goal_current_batch = [self.all_goals[i] for i in goal_indices] + collision_sample_size = int(self.oversample_collision * batch_size) + collision_indices = list(np.random.choice(len(self.all_collisions), collision_sample_size)) + collision_current_batch = [self.all_collisions[i] for i in collision_indices] + return free_transition_batch + goal_current_batch + collision_current_batch + + def __iter__(self): + for free_transition_batch in self.batcher: + yield self._oversample_result(free_transition_batch) + + def stop(self): + self.free_transitions_iterator.stop() + + +def get_image_cache(config): + image_cache = None + scenario = config['general']['scenario'] + if 'vision' in scenario: + print("Loading image cache") + params_dir = os.path.abspath(os.path.join(os.getcwd(), "scenario_params", scenario)) + image_cache = ImageCache(params_dir) + return image_cache + + +def get_train_and_test_datasets(config, is_collision_model=False): + number_of_unzippers = config['general']['number_of_unzippers'] + batch_size = config['train']['batch_size'] + if is_collision_model: + oversample_goal = 0 + else: + oversample_goal = config['train']['oversample_goal'] + oversample_collision = config['train']['oversample_collision'] + scenario = config['general']['scenario'] + base_data_dir = os.path.join('data', 'supervised_data', scenario + '_by_status') + train_data_dir = os.path.join(base_data_dir, 'train') + test_data_dir = os.path.join(base_data_dir, 'test') + train = Oversampler(train_data_dir, batch_size, oversample_goal, oversample_collision, + number_of_unzippers=number_of_unzippers) + print("Loaded Train") + test = Oversampler(test_data_dir, batch_size, oversample_goal, oversample_collision, + number_of_unzippers=number_of_unzippers) + print("Loaded Test") + return train, test + + +def get_batch_and_labels(batch, image_cache): + all_start_joints = [] + all_actions = [] + all_status = [] + all_next_joints = [] + all_goal_joints = [] + all_images = None + if image_cache is not None: + all_images = [] + for i in range(len(batch)): + if image_cache is None: + workspace_id = None + start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i] + else: + workspace_id, start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i] + all_start_joints.append(start_joints[1:]) + all_actions.append(action[1:]) + all_next_joints.append(next_joints[1:]) + all_goal_joints.append(goal_joints[1:]) + all_status.append(status) + if image_cache is not None: + workspace_id = workspace_id.split(".")[0]+".pkl" + image = image_cache.items[workspace_id].np_array + all_images.append(image) + return [all_start_joints, all_actions, all_images, all_next_joints, all_goal_joints], all_status diff --git a/reward_model.py b/reward_model.py new file mode 100644 index 0000000..17aabab --- /dev/null +++ b/reward_model.py @@ -0,0 +1,146 @@ +from reward_collision_model import CollisionModel +import reward_data_manager +from reward_data_manager import get_train_and_test_datasets, get_batch_and_labels, get_image_cache +import time +import datetime +import numpy as np +import os +import yaml +import tensorflow as tf + + +class RewardModel: + + def __init__(self, model_name, config, models_base_dir, tensorboard_dir, trained_collision_model): + + self.model_name = model_name + self.config = config + + self.model_dir = os.path.join(models_base_dir, self.model_name) + if not os.path.exists(self.model_dir): + os.makedirs(self.model_dir) + + self.train_summaries = [] + self.test_summaries = [] + + self.collision_model = trained_collision_model + self.collision_prob = self.collision_model.collision_prob + + self.next_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='next_joints_inputs') + self.goal_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='goal_joints_inputs') + + self.states_probabilities = self.create_reward_logic(self.next_joints_inputs, self.goal_joints_inputs, self.collision_prob) + + self.status_input = tf.placeholder(tf.int32, [None, ]) + self.prediction = tf.argmax(self.states_probabilities, axis=1, output_type=tf.int32) + + self.test_measures = self.add_test_measures() + + with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd: + yaml.dump(config, fd) + + self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries) + + def predict(self, data_batch, session): + feed = self.collision_model.make_feed(data_batch) + return session.run([self.prediction], feed)[0] + + def create_reward_logic(self, next_joints, goal_joints, collision_prob): + # close-to-goal sensitivity + goal_sensitivity = self.config['model']['goal_configuration_distance_sensitivity'] + + # margin parameter in which the model transitions from 100% close to goal to 0% close to goal + alpha = 0.4 + + # distance to goal + self.delta = tf.norm(next_joints - goal_joints, ord='euclidean', axis=1) + + # This expression is 0 if the distance between current and goal is below \epsilon. + self.a = tf.maximum(self.delta - goal_sensitivity, 0) + + # When delta < epsilon: this is 0 + # When delta > epsilon + alpha: this is \alpha, + # and in-between it is linear + self.b = tf.minimum(self.a, alpha) + + self.is_close_to_goal = 1 - (self.b / alpha) + + self.is_close_to_goal = tf.expand_dims(self.is_close_to_goal, -1) + + p_goal = (1 - collision_prob) * self.is_close_to_goal + p_free_space = (1 - collision_prob) * (1 - self.is_close_to_goal) + + return tf.concat((p_free_space, collision_prob, p_goal), axis=1) + + def add_test_measures(self): + labels = self.status_input - 1 + accuracy = tf.reduce_mean(tf.cast(tf.equal(labels, self.prediction), tf.float32)) + # # TP = tf.count_nonzero((self.prediction) * (labels), dtype=tf.float32) + tf.Variable(0.001) + # # TN = tf.count_nonzero((self.prediction - 1) * (labels - 1), dtype=tf.float32) + tf.Variable(0.001) + # # FP = tf.count_nonzero(self.prediction * (labels - 1), dtype=tf.float32) + tf.Variable(0.001) + # # FN = tf.count_nonzero((self.prediction - 1) * labels, dtype=tf.float32) + tf.Variable(0.001) + # precision = TP / (TP + FP) + # recall = TP / (TP + FN) + accuracy_summary = tf.summary.scalar('Accuracy', accuracy) + # recall_summary = tf.summary.scalar('Recall', recall) + # precision_summary = tf.summary.scalar('Precision', precision) + self.test_summaries += [accuracy_summary] + return [accuracy] + + def _test_batch(self, test_batch, test_status_batch, session): + batch_start_joints, batch_actions, batch_images, batch_next_joints, batch_goal_joints = test_batch + test_feed = self.collision_model.make_feed((batch_start_joints, batch_actions, batch_images)) + test_feed[self.status_input] = np.array(test_status_batch) + test_feed[self.next_joints_inputs] = batch_next_joints + test_feed[self.goal_joints_inputs] = batch_goal_joints + test_summary = session.run( + [self.test_board.summaries] + self.test_measures, + test_feed)[0] + results = session.run( + [self.test_measures, self.prediction, self.status_input - 1, self.states_probabilities], + test_feed) + print(results) + self.test_board.writer.add_summary(test_summary) + self.test_board.writer.flush() + + def test(self, test_data, image_cache, session): + session.run(tf.global_variables_initializer()) + session.run(tf.local_variables_initializer()) + + self.collision_model.load(session) + + test_batch = next(test_data.__iter__()) # random test batch + test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache) + self._test_batch(test_batch, test_status_batch, session) + + class TensorBoard: + + def __init__(self, tensorboard_path, board_name, summaries): + self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name)) + self.summaries = tf.summary.merge(summaries) + + +if __name__ == '__main__': + # read the config + config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml') + with open(config_path, 'r') as yml_file: + config = yaml.load(yml_file) + print('------------ Config ------------') + print(yaml.dump(config)) + + models_base_dir = os.path.join('data', 'reward', 'model') + collision_model_name = "2019_08_31_09_14_58-collision-resnet-vision" + collision_model = CollisionModel(collision_model_name, config, models_base_dir, tensorboard_dir=models_base_dir) + + model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') + reward_model = RewardModel(model_name, config, models_base_dir, models_base_dir, collision_model) + + train_data, test_data = get_train_and_test_datasets(config, is_collision_model=False) + image_cache = get_image_cache(config) + + gpu_usage = config['general']['gpu_usage'] + session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage)) + with tf.Session(config=session_config) as session: + reward_model.test(test_data, image_cache, session) + train_data.stop() + test_data.stop() diff --git a/train_supervised_reward.py b/train_supervised_reward.py deleted file mode 100644 index ba423a8..0000000 --- a/train_supervised_reward.py +++ /dev/null @@ -1,385 +0,0 @@ -from image_cache import ImageCache -from pre_trained_reward import * -from openrave_manager import OpenraveManager -from potential_point import PotentialPoint - -import time -import datetime -import numpy as np -import random -import os -import yaml -import tensorflow as tf -import multiprocessing -import Queue - - -class UnzipperProcess(multiprocessing.Process): - def __init__(self, number_of_unzippers, files_queue, result_queue, unzipper_specific_queue): - multiprocessing.Process.__init__(self) - self.number_of_unzippers = number_of_unzippers - self.files_queue = files_queue - self.result_queue = result_queue - self.unzipper_specific_queue = unzipper_specific_queue - - def run(self): - while True: - try: - if self.result_queue.qsize() < self.number_of_unzippers: - next_file = self.files_queue.get(block=True, timeout=1) - with open(next_file, 'r') as source_file: - result = pickle.load(source_file) - self.result_queue.put(result) - self.result_queue.task_done() - else: - time.sleep(1) - except Queue.Empty: - pass - try: - # only option is to break - task_type = self.unzipper_specific_queue.get(block=True, timeout=0.001) - break - except Queue.Empty: - pass - - -class UnzipperIterator: - def __init__(self, number_of_unzippers, files): - self.number_of_unzippers = number_of_unzippers - self.files = files - - self.unzipper_specific_queues = [multiprocessing.JoinableQueue() for _ in range(number_of_unzippers)] - self.files_queue = multiprocessing.JoinableQueue() - self.results_queue = multiprocessing.JoinableQueue() - - self.unzippers = [UnzipperProcess( - number_of_unzippers, self.files_queue, self.results_queue, self.unzipper_specific_queues[i]) - for i in range(number_of_unzippers)] - - for u in self.unzippers: - u.start() - - def __iter__(self): - random.shuffle(self.files) - # put all the files - for f in self.files: - self.files_queue.put(f) - # when ready - collect the zipped files - for i in range(len(self.files)): - unzipped = self.results_queue.get() - yield unzipped - - def end(self): - for u in self.unzippers: - u.terminate() - time.sleep(10) - - -class RewardDataLoader: - def __init__(self, data_dir, status_to_read, number_of_unzippers=None): - assert os.path.exists(data_dir) - cache_dir = data_dir.replace('supervised_data', 'supervised_data_cache') - if not os.path.exists(cache_dir): - self._create_cache(data_dir, cache_dir) - - files = [f for f in os.listdir(data_dir) if f.endswith(".pkl") and f.startswith('{}_'.format(status_to_read))] - assert len(files) > 0 - self.cache_dir = cache_dir - self.files = [os.path.join(self.cache_dir, f) for f in files] - - self.files_iterator = None - if number_of_unzippers is not None: - self.files_iterator = UnzipperIterator(number_of_unzippers, self.files) - - @staticmethod - def _create_cache(data_dir, cache_dir): - print 'creating cache for {} in {}'.format(data_dir, cache_dir) - os.makedirs(cache_dir) - files = [f for f in os.listdir(data_dir) if f.endswith(".pkl")] - for f in files: - destination_file = os.path.join(cache_dir, f) - print 'caching {}'.format(f) - with bz2.BZ2File(os.path.join(data_dir, f), 'r') as compressed_file: - data = pickle.load(compressed_file) - with open(destination_file, 'w') as cache_file: - pickle.dump(data, cache_file) - print 'done creating cache for {} in {}'.format(data_dir, cache_dir) - - def __iter__(self): - random.shuffle(self.files) - if self.files_iterator is None: - for f in self.files: - with open(f, 'r') as source_file: - yield pickle.load(source_file) - else: - for content in self.files_iterator: - yield content - - def stop(self): - if self.files_iterator is not None: - self.files_iterator.end() - - -class Batcher: - def __init__(self, input_iterator, batch_size, shuffle_before_yield): - self.input_iterator = input_iterator - self.batch_size = batch_size - self.shuffle_before_yield = shuffle_before_yield - - def __iter__(self): - current_batch = [] - for tuple_list in self.input_iterator: - for t in tuple_list: - current_batch.append(t) - if len(current_batch) == self.batch_size: - if self.shuffle_before_yield: - random.shuffle(current_batch) - yield current_batch - current_batch = [] - if self.shuffle_before_yield: - random.shuffle(current_batch) - yield current_batch - - -class Oversampler: - def __init__(self, data_dir, free_class_batch_size, oversample_goal, oversample_collision, - shuffle_batch_multiplier=2, number_of_unzippers=None): - self.data_dir = data_dir - self.oversample_goal = oversample_goal - self.oversample_collision = oversample_collision - - # load data - self.all_collisions = self._load_all(RewardDataLoader(data_dir, 2, None)) - self.all_goals = self._load_all(RewardDataLoader(data_dir, 3, None)) - self.free_transitions_iterator = RewardDataLoader(data_dir, 1, number_of_unzippers=number_of_unzippers) - self._describe_data(self.free_transitions_iterator) - - inner_batcher = Batcher(self.free_transitions_iterator, free_class_batch_size * shuffle_batch_multiplier, True) - self.batcher = Batcher(inner_batcher, free_class_batch_size, False) - - def _describe_data(self, free_transitions_iterator): - free_count = 0 - for tuple_list in free_transitions_iterator: - free_count += len(tuple_list) - collision_count = len(self.all_collisions) - goal_count = len(self.all_goals) - all_count = free_count + collision_count + goal_count - - print 'data dir: {}'.format(self.data_dir) - print 'free: {} ({})'.format(free_count, float(free_count) / all_count) - print 'collision: {} ({})'.format(collision_count, float(collision_count) / all_count) - print 'goal: {} ({})'.format(goal_count, float(goal_count) / all_count) - print '' - - @staticmethod - def _load_all(files_iterator): - all_transitions = [] - for tuple_list in files_iterator: - all_transitions.extend(tuple_list) - return all_transitions - - def _oversample_result(self, free_transition_batch): - batch_size = len(free_transition_batch) - goal_sample_size = int(self.oversample_goal * batch_size) - goal_indices = list(np.random.choice(len(self.all_goals), goal_sample_size)) - goal_current_batch = [self.all_goals[i] for i in goal_indices] - collision_sample_size = int(self.oversample_collision * batch_size) - collision_indices = list(np.random.choice(len(self.all_collisions), collision_sample_size)) - collision_current_batch = [self.all_collisions[i] for i in collision_indices] - return free_transition_batch + goal_current_batch + collision_current_batch - - def __iter__(self): - for free_transition_batch in self.batcher: - yield self._oversample_result(free_transition_batch) - - def stop(self): - self.free_transitions_iterator.stop() - - -model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') - -# read the config -config_path = os.path.join(os.getcwd(), 'config/reward_config.yml') -with open(config_path, 'r') as yml_file: - config = yaml.load(yml_file) - print('------------ Config ------------') - print(yaml.dump(config)) - - -epochs = config['general']['epochs'] -save_every_epochs = config['general']['save_every_epochs'] -batch_size = config['model']['batch_size'] - -initial_learn_rate = config['reward']['initial_learn_rate'] -decrease_learn_rate_after = config['reward']['decrease_learn_rate_after'] -learn_rate_decrease_rate = config['reward']['learn_rate_decrease_rate'] -oversample_goal = config['reward']['oversample_goal'] -oversample_collision = config['reward']['oversample_collision'] - -scenario = config['general']['scenario'] -# base_data_dir = os.path.join('supervised_data', scenario) -base_data_dir = os.path.join('supervised_data', scenario + '_by_status') -image_cache = None -if 'vision' in scenario: - params_dir = os.path.abspath(os.path.expanduser('~/ModelBasedDDPG/scenario_params/{}/'.format(scenario))) - image_cache = ImageCache(params_dir) -train_data_dir = os.path.join(base_data_dir, 'train') -test_data_dir = os.path.join(base_data_dir, 'test') - - -number_of_unzippers = config['general']['number_of_unzippers'] - -train = Oversampler(train_data_dir, batch_size, oversample_goal, oversample_collision, - number_of_unzippers=number_of_unzippers) -test = Oversampler(test_data_dir, batch_size, oversample_goal, oversample_collision, - number_of_unzippers=number_of_unzippers) - -# get openrave manager -openrave_manager = OpenraveManager(0.001, PotentialPoint.from_config(config)) - -# set summaries and saver dir -summaries_dir = os.path.join('reward', 'tensorboard') -train_summary_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'train_' + model_name)) -test_summary_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'test_' + model_name)) -saver_dir = os.path.join('reward', 'model', model_name) -if not os.path.exists(saver_dir): - os.makedirs(saver_dir) - -# save the config -config_copy_path = os.path.join(saver_dir, 'config.yml') -yaml.dump(config, open(config_copy_path, 'w')) - -# create the network -pre_trained_reward = PreTrainedReward(model_name, config) -reward_prediction = pre_trained_reward.reward_prediction -reward_input = tf.placeholder(tf.float32, [None, 1]) -# reward_loss = tf.reduce_mean(tf.square(reward_input - reward_prediction)) -reward_loss = tf.losses.mean_squared_error(labels=reward_input, predictions=reward_prediction) -status_prediction = pre_trained_reward.status_softmax_logits -status_input = tf.placeholder(tf.int32, [None, ]) -# status_loss = tf.reduce_mean( -# tf.nn.sparse_softmax_cross_entropy_with_logits(labels=status_input-1, logits=status_prediction) -# ) * config['reward']['cross_entropy_coefficient'] -status_loss = tf.losses.sparse_softmax_cross_entropy(labels=status_input-1, logits=status_prediction) * config[ - 'reward']['cross_entropy_coefficient'] -regularization_loss = tf.losses.get_regularization_loss() -total_loss = reward_loss + status_loss + regularization_loss - - -global_step = tf.Variable(0, trainable=False) -learning_rate = tf.train.exponential_decay( - initial_learn_rate, global_step, decrease_learn_rate_after, learn_rate_decrease_rate, staircase=True) -optimizer = tf.train.AdamOptimizer(learning_rate) -gradients, variables = zip(*optimizer.compute_gradients(total_loss, tf.trainable_variables())) -initial_gradients_norm = tf.global_norm(gradients) -gradient_limit = config['reward']['gradient_limit'] -if gradient_limit > 0.0: - gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm) -clipped_gradients_norm = tf.global_norm(gradients) -optimize_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) - -reward_loss_summary = tf.summary.scalar('reward_loss', reward_loss) -status_loss_summary = tf.summary.scalar('status_loss', status_loss) -regularization_loss_summary = tf.summary.scalar('regularization_loss_summary', regularization_loss) -total_loss_summary = tf.summary.scalar('total_loss_summary', total_loss) - -# summaries for the reward optimization -train_optimization_summaries = tf.summary.merge([ - reward_loss_summary, - status_loss_summary, - regularization_loss_summary, - total_loss_summary, - tf.summary.scalar('gradients_norm_initial', initial_gradients_norm), - tf.summary.scalar('gradients_norm_clipped', clipped_gradients_norm), - tf.summary.scalar('learn_rate', learning_rate) -]) - -test_optimization_summaries = tf.summary.merge([ - reward_loss_summary, - status_loss_summary, - regularization_loss_summary, - total_loss_summary, -]) - -goal_mean_rewards_error_input = tf.placeholder(tf.float32, name='goal_mean_rewards_error_input') -collision_mean_rewards_error_input = tf.placeholder(tf.float32, name='collision_mean_rewards_error_input') -other_mean_rewards_error_input = tf.placeholder(tf.float32, name='other_mean_rewards_error_input') -goal_max_rewards_error_input = tf.placeholder(tf.float32, name='goal_max_rewards_error_input') -collision_max_rewards_error_input = tf.placeholder(tf.float32, name='collision_max_rewards_error_input') -other_max_rewards_error_input = tf.placeholder(tf.float32, name='other_max_rewards_error_input') -goal_accuracy_input = tf.placeholder(tf.float32, name='goal_accuracy_input') -collision_accuracy_input = tf.placeholder(tf.float32, name='collision_accuracy_input') -other_accuracy_input = tf.placeholder(tf.float32, name='other_accuracy_input') - -test_summaries = tf.summary.merge([ - tf.summary.scalar('goal_mean_rewards_error', goal_mean_rewards_error_input), - tf.summary.scalar('collision_mean_rewards_error', collision_mean_rewards_error_input), - tf.summary.scalar('other_mean_rewards_error', other_mean_rewards_error_input), - tf.summary.scalar('goal_max_rewards_error', goal_max_rewards_error_input), - tf.summary.scalar('collision_max_rewards_error', collision_max_rewards_error_input), - tf.summary.scalar('other_max_rewards_error', other_max_rewards_error_input), - tf.summary.scalar('goal_accuracy', goal_accuracy_input), - tf.summary.scalar('collision_accuracy', collision_accuracy_input), - tf.summary.scalar('other_accuracy', other_accuracy_input), -]) - -with tf.Session( - config=tf.ConfigProto( - gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=config['general']['gpu_usage']) - ) -) as sess: - sess.run(tf.global_variables_initializer()) - current_global_step = 0 - for epoch in range(epochs): - # run train for one epoch - for train_batch in train: - train_batch, train_rewards, train_status = get_batch_and_labels(train_batch, openrave_manager, image_cache) - train_status_one_hot = np.zeros((len(train_rewards), 3), dtype=np.float32) - train_status_one_hot[np.arange(len(train_rewards)), np.array(train_status)-1] = 1.0 - train_feed = pre_trained_reward.make_feed(*train_batch, all_transition_labels=train_status_one_hot) - train_feed[reward_input] = np.expand_dims(np.array(train_rewards), axis=1) - train_feed[status_input] = np.array(train_status) - train_total_loss, train_summary, current_global_step, _ = sess.run( - [total_loss, train_optimization_summaries, global_step, optimize_op], train_feed) - # if np.isnan(train_loss): - # for b in train_batch: - # print b - # print train_rewards - # exit() - train_summary_writer.add_summary(train_summary, current_global_step) - train_summary_writer.flush() - - if current_global_step > 0: - # run test for one (random) batch - test_batch = None - for test_batch in test: - break - test_batch, test_rewards, test_status = get_batch_and_labels(test_batch, openrave_manager, image_cache) - test_feed = pre_trained_reward.make_feed(*test_batch) - test_feed[reward_input] = np.expand_dims(np.array(test_rewards), axis=1) - test_feed[status_input] = np.array(test_status) - test_reward_prediction, test_status_prediction, test_total_loss, test_summary = sess.run( - [reward_prediction, status_prediction, total_loss, test_optimization_summaries], test_feed) - test_summary_writer.add_summary(test_summary, current_global_step) - # see what happens for different reward classes: - goal_stats, collision_stats, other_stats = compute_stats_per_class( - test_status, test_rewards, test_status_prediction, test_reward_prediction) - test_summary_writer.add_summary(sess.run(test_summaries, { - goal_mean_rewards_error_input: goal_stats[0], - collision_mean_rewards_error_input: collision_stats[0], - other_mean_rewards_error_input: other_stats[0], - goal_max_rewards_error_input: goal_stats[1], - collision_max_rewards_error_input: collision_stats[1], - other_max_rewards_error_input: other_stats[1], - goal_accuracy_input: goal_stats[2], - collision_accuracy_input: collision_stats[2], - other_accuracy_input: other_stats[2], - }), current_global_step) - test_summary_writer.flush() - # save the model - if epoch % save_every_epochs == save_every_epochs-1: - pre_trained_reward.saver.save(sess, os.path.join(saver_dir, 'reward'), global_step=current_global_step) - print 'done epoch {} of {}, global step {}'.format(epoch, epochs, current_global_step) - -train.stop() -test.stop() diff --git a/vae_network.py b/vae_network.py new file mode 100644 index 0000000..edc1908 --- /dev/null +++ b/vae_network.py @@ -0,0 +1,161 @@ +import tensorflow as tf +from coordnet_model import coord_conv + + +def get_flatten_shape(multi_dim_shape): + flat_shape = 1 + for dim in multi_dim_shape.as_list()[1:]: + flat_shape *= dim + return flat_shape + + +class VAENetwork: + def __init__(self, config, model_dir, input_shape): + self.prefix = 'vae' + self.config = config + self.latent_dim = self.config['reward']['vae_latent_dim'] + self.input_shape = input_shape.as_list() + + assert(self.input_shape[0] is None) + self.input_shape[0] = -1 + self.flat_input_shape = get_flatten_shape(input_shape) + + self.workspace_image_inputs = tf.placeholder(tf.float32, input_shape, name='workspace_image_inputs') + self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1) + + self.encoded, self.mean, self.std_dev = self.encode(self.images_3d, reuse_flag=False) + self.decoded = self.decode(self.encoded, reuse_flag=False) + self.epochs = 10 + + def predict(self, workspace_image, reuse_flag): + return self.encode(workspace_image, reuse_flag)[0] + + def encode(self, workspace_image, reuse_flag): + if self.config['reward']['use_coordnet']: + print("Using Coordnet") + workspace_image = coord_conv(55, 111, False, workspace_image, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True, + name='{}_conv1'.format(self.prefix), _reuse=reuse_flag) + workspace_image = tf.Print(workspace_image, [workspace_image], message="my workspace_image:") + + conv2 = tf.layers.conv2d(workspace_image, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True, + name='{}_conv2'.format(self.prefix), reuse=reuse_flag) + conv2 = tf.Print(conv2, [conv2], message="my encoded_conv2:") + + conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True) + conv4 = tf.layers.conv2d(conv3, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True) + flat = tf.layers.flatten(conv4, name='{}_flat'.format(self.prefix)) + + self.encode_last_conv_shape = conv4.shape.as_list() + self.encode_last_conv_shape[0] = -1 + self.encode_after_conv_flat_shape = flat.shape.as_list() + + self.conv2 = conv2 + self.workspace_image = workspace_image + + + dense1 = tf.layers.dense(flat, 512, activation=tf.nn.relu, name='{}_dense1'.format(self.prefix), reuse=reuse_flag) + dense2 = tf.layers.dense(dense1, 512, activation=tf.nn.relu, name='{}_dense2'.format(self.prefix), reuse=reuse_flag) + dense3 = tf.layers.dense(dense2, 512, activation=tf.nn.relu, name='{}_dense3'.format(self.prefix), reuse=reuse_flag) + + # Local latent variables + mean_ = tf.layers.dense(dense3, units=self.latent_dim, name='mean') + std_dev = tf.nn.softplus(tf.layers.dense(dense3, units=self.latent_dim), name='std_dev') # softplus to force >0 + mean_ = tf.Print(mean_, [mean_], message="my mean_:") + std_dev = tf.Print(std_dev, [std_dev], message="my std_dev:") + # Reparametrization trick + epsilon = tf.random_normal(tf.stack([tf.shape(dense3)[0], self.latent_dim]), name='epsilon') + z = mean_ + tf.multiply(epsilon, std_dev) + + return z, mean_, std_dev + + def decode(self, encoded_layer, reuse_flag): + decoded_dense1 = tf.layers.dense(encoded_layer, 512, activation=tf.nn.relu, name='{}_decoded_dense1'.format(self.prefix), reuse=reuse_flag) + decoded_dense2 = tf.layers.dense(decoded_dense1, 512, activation=tf.nn.relu, name='{}_decoded_dense2'.format(self.prefix), reuse=reuse_flag) + decoded_dense3 = tf.layers.dense(decoded_dense2, self.encode_after_conv_flat_shape[1], activation=tf.nn.relu, name='{}_decoded_dense3'.format(self.prefix), reuse=reuse_flag) + decoded_before_conv = tf.reshape(decoded_dense3, self.encode_last_conv_shape) + + decoded_conv4 = tf.layers.conv2d_transpose(decoded_before_conv, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True, + name='{}_decoded_conv4'.format(self.prefix), reuse=reuse_flag) + decoded_conv3 = tf.layers.conv2d_transpose(decoded_conv4, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True, + name='{}_decoded_conv3'.format(self.prefix), reuse=reuse_flag) + decoded_conv2 = tf.layers.conv2d_transpose(decoded_conv3, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True, + name='{}_decoded_conv2'.format(self.prefix), reuse=reuse_flag) + decoded_conv2 = tf.Print(decoded_conv2, [decoded_conv2], message="my decoded_conv2:") + img = tf.slice(decoded_conv2, [0,0,0,0], [-1, 55, 111, 1]) + img = tf.Print(img, [img], message="my decoded_img:") + + # flat = tf.layers.flatten(decoded_conv3, name='{}_decode_flat'.format(self.prefix)) + # decoded_dense4 = tf.layers.dense(sliced_image, self.flat_input_shape, activation=tf.nn.sigmoid, name='{}_decoded_dense4'.format(self.prefix), reuse=reuse_flag) + # + # img = tf.reshape(decoded_dense4, shape=self.input_shape) + + return img + + def get_loss(self): + # Reshape input and output to flat vectors + flat_output = tf.reshape(self.decoded, [-1, get_flatten_shape(self.decoded.shape)]) + 0.0001 + flat_input = tf.reshape(self.images_3d, [-1, get_flatten_shape(self.images_3d.shape)]) + flat_output = tf.Print(flat_output, [flat_output], message="my flat_output:") + flat_input = tf.Print(flat_input, [flat_input], message="my flat_input:") + with tf.name_scope('loss'): + img_loss_vec = flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output) + img_loss_vec = tf.Print(img_loss_vec, [img_loss_vec], message="my img_loss_vec:") + img_loss = tf.reduce_sum(img_loss_vec, 1) + # img_loss = tf.reduce_sum(flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output), 1) + img_loss = tf.Print(img_loss, [img_loss], message="my img_loss:") + latent_loss = 0.5 * tf.reduce_sum(tf.square(self.mean) + tf.square(self.std_dev) - tf.log(tf.square(self.std_dev)) - 1, 1) + latent_loss = tf.Print(latent_loss, [latent_loss], message="my latent_loss:") + self.total_loss = tf.reduce_mean(img_loss + latent_loss) + self.latent_loss = tf.reduce_mean(latent_loss) + self.img_loss = tf.reduce_mean(img_loss) + + return self.img_loss, self.latent_loss, self.total_loss + + + +import yaml +import os +import datetime +import time +from reward_data_manager import * + +if __name__ == "__main__": + # read the config + config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml') + with open(config_path, 'r') as yml_file: + config = yaml.load(yml_file) + print('------------ Config ------------') + print(yaml.dump(config)) + + model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') + + image_cache = get_image_cache(config) + batch_size = 100 + images_data = [image.np_array for image in image_cache.items.values()] + images_batch_data = [images_data[i:i+batch_size] for i in range(0, len(images_data), batch_size)] + + train_data_count = int(len(images_batch_data) * 0.8) + train_data = images_batch_data[:train_data_count] + test_data = images_batch_data[train_data_count:] + + workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs') + model = VAENetwork(model_name, config, workspace_image_inputs.shape) + with tf.Session() as session: + model.train(train_data, test_data, session) + + # model = DqnModel(model_name, config, images_3d.shape) + # z, mean_, std_dev = model.encode(images_3d, reuse_flag=False) + # output = model.decode(z, reuse_flag=False) + # + # # Reshape input and output to flat vectors + # flat_output = tf.reshape(output, [-1, get_flatten_shape(output.shape)]) + # flat_input = tf.reshape(images_3d, [-1, get_flatten_shape(images_3d.shape)]) + + # with tf.name_scope('loss'): + # img_loss = tf.reduce_sum(flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output), 1) + # latent_loss = 0.5 * tf.reduce_sum(tf.square(mean_) + tf.square(std_dev) - tf.log(tf.square(std_dev)) - 1, 1) + # loss = tf.reduce_mean(img_loss + latent_loss) + + + x = 1 + diff --git a/workspace_generation_utils.py b/workspace_generation_utils.py index 58e32fb..920a48b 100755 --- a/workspace_generation_utils.py +++ b/workspace_generation_utils.py @@ -193,7 +193,7 @@ def __init__(self, print_info=True, min_obstacles=1, max_obstacles=3, min_center def _print_variable(self, name, variable): if self.print_info: - print name, variable + print(name, variable) def _randomize_obstacle_parameters(self): ray_angle = uniform(WorkspaceGenerator.min_angle, WorkspaceGenerator.max_angle) @@ -296,7 +296,7 @@ def plan_start_goal(self, slices, max_planner_iterations): def _print_variable(self, name, variable): if self.print_info: - print name, variable + print(name, variable) def _get_valid_joints(self, slices, forbidden_slice=None): while True: