diff --git a/coordnet_model.py b/coordnet_model.py
new file mode 100644
index 0000000..260f3aa
--- /dev/null
+++ b/coordnet_model.py
@@ -0,0 +1,61 @@
+import tensorflow as tf
+from tensorflow.python.layers import base
+
+
+class AddCoords(base.Layer):
+    """Add coords to a tensor"""
+    def __init__(self, x_dim=64, y_dim=64, with_r=False):
+        super(AddCoords, self).__init__()
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.with_r = with_r
+
+    def call(self, input_tensor, **kwargs):
+        """
+        input_tensor: (batch, x_dim, y_dim, c)
+        """
+        batch_size_tensor = tf.shape(input_tensor)[0]
+        xx_ones = tf.ones([batch_size_tensor, self.x_dim], dtype=tf.int32)
+        xx_ones = tf.expand_dims(xx_ones, -1)
+        xx_range = tf.tile(tf.expand_dims(tf.range(self.y_dim), 0),
+                          [batch_size_tensor, 1])
+        xx_range = tf.expand_dims(xx_range, 1)
+        xx_channel = tf.matmul(xx_ones, xx_range)
+        xx_channel = tf.expand_dims(xx_channel, -1)
+        yy_ones = tf.ones([batch_size_tensor, self.y_dim], dtype=tf.int32)
+        yy_ones = tf.expand_dims(yy_ones, 1)
+        yy_range = tf.tile(tf.expand_dims(tf.range(self.x_dim), 0),
+                          [batch_size_tensor, 1])
+        yy_range = tf.expand_dims(yy_range, -1)
+        yy_channel = tf.matmul(yy_range, yy_ones)
+        yy_channel = tf.expand_dims(yy_channel, -1)
+        xx_channel = tf.cast(xx_channel, 'float32') / (self.x_dim - 1)
+        yy_channel = tf.cast(yy_channel, 'float32') / (self.y_dim - 1)
+        xx_channel = xx_channel*2 - 1
+        yy_channel = yy_channel*2 - 1
+        ret = tf.concat([input_tensor, xx_channel, yy_channel], axis=-1)
+        if self.with_r:
+            rr = tf.sqrt(tf.square(xx_channel) + tf.square(yy_channel))
+            ret = tf.concat([ret, rr], axis=-1)
+        return ret
+
+
+class CoordConv(base.Layer):
+    """CoordConv layer as in the paper."""
+    def __init__(self, x_dim, y_dim, with_r, *args, **kwargs):
+        super(CoordConv, self).__init__()
+        self.addcoords = AddCoords(x_dim=x_dim,
+                               y_dim=y_dim,
+                               with_r=with_r)
+        self.conv = tf.layers.Conv2D(*args, **kwargs)
+
+    def call(self, input_tensor, **kwargs):
+        ret = self.addcoords(input_tensor)
+        ret = self.conv(ret)
+        return ret
+
+
+def coord_conv(x_dim, y_dim, with_r, inputs, *args, **kwargs):
+    layer = CoordConv(x_dim, y_dim, with_r, *args, **kwargs)
+    return layer.apply(inputs)
+
diff --git a/data/config/reward_config.yml b/data/config/reward_config.yml
index 4406e29..5dc05fe 100644
--- a/data/config/reward_config.yml
+++ b/data/config/reward_config.yml
@@ -1,6 +1,4 @@
 general:
-  epochs: 5000
-  save_every_epochs: 1
 #  gpu_usage: 0.1
   gpu_usage: 0.8  # with vision
 #  scenario: 'no_obstacles'
@@ -8,7 +6,7 @@ general:
 #  scenario: 'hard'
 #  scenario: 'vision'
   scenario: 'vision_harder'
-  number_of_unzippers: 10
+  number_of_unzippers: 1
 
 openrave_rl:
   action_step_size: 0.025
@@ -21,24 +19,31 @@ openrave_rl:
   truncate_penalty: 0.05
 
 model:
-#  batch_size: 10240
-  batch_size: 2000  # with vision
   potential_points: [2, 0., 0.075, 3, 0., 0.085, 4, -0.02, 0.05, 4, 0.005, 0.05, 5, 0.005, 0.035, 5, -0.02, 0.035]
   consider_goal_pose: True
+  goal_configuration_distance_sensitivity: 1.23
 
-reward:
+train:
+  epochs: 20
+  save_every_epochs: 1
+  #  batch_size: 10240
+  batch_size: 300  # with vision
+  test_every_batches: 1000
   initial_learn_rate: 0.001
-#  initial_learn_rate: 0.01
-  decrease_learn_rate_after: 2000
-#  decrease_learn_rate_after: 10000
-#  learn_rate_decrease_rate: 0.8
-  learn_rate_decrease_rate: 1.0
+  decrease_learn_rate_after: 10000
+  learn_rate_decrease_rate: 0.5
   gradient_limit: 5.0
-#  gradient_limit: 0.0
-  layers: [100, 100, 100, 100]
-#  layers: [200, 200, 200, 200, 200]
   l2_regularization_coefficient: 0.0001
   cross_entropy_coefficient: 1.0
+  oversample_goal: 0.2
+  oversample_collision: 0.38
+
+network:
+  layers: [300, 300, 300, 200, 100, 100]
   activation: 'elu'
-  oversample_goal: 1.0
-  oversample_collision: 1.0
+  use_coordnet: true
+  image_network: 'resnet'
+  train_vae: true
+  vae_latent_dim: 150
+  resnet_num_of_residual_blocks: 6
+
diff --git a/dqn_model.py b/dqn_model.py
index 0bb542f..1431147 100644
--- a/dqn_model.py
+++ b/dqn_model.py
@@ -1,20 +1,28 @@
 import tensorflow as tf
+from coordnet_model import coord_conv
 
 
 class DqnModel:
-    def __init__(self, prefix):
+    def __init__(self, prefix, config):
         self.prefix = '{}_dqn'.format(prefix)
+        self.config = config
+        self.use_coordnet = self.config['network']['use_coordnet']
 
     def predict(self, workspace_image, reuse_flag):
-        conv1 = tf.layers.conv2d(workspace_image, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True,
-                                 name='{}_conv1'.format(self.prefix), reuse=reuse_flag)
-        conv2 = tf.layers.conv2d(conv1, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
+        if self.use_coordnet:
+            workspace_image = coord_conv(55, 111, False, workspace_image, 32, 8, 4, padding='same',
+                                         activation=tf.nn.relu, use_bias=True, name='{}_conv1'.format(self.prefix),
+                                         _reuse=reuse_flag)
+
+        conv2 = tf.layers.conv2d(workspace_image, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
                                  name='{}_conv2'.format(self.prefix), reuse=reuse_flag)
-        # conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True)
-        # flat = tf.layers.flatten(conv3)
-        flat = tf.layers.flatten(conv2, name='{}_flat'.format(self.prefix))
+        conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True)
+
+        flat = tf.layers.flatten(conv3, name='{}_flat'.format(self.prefix))
         dense1 = tf.layers.dense(flat, 512, activation=tf.nn.relu, name='{}_dense1'.format(self.prefix),
                                  reuse=reuse_flag)
-        dense2 = tf.layers.dense(dense1, 512, activation=None, name='{}_dense2'.format(self.prefix), reuse=reuse_flag)
-        return dense2
-
+        dense2 = tf.layers.dense(dense1, 512, activation=tf.nn.relu, name='{}_dense2'.format(self.prefix),
+                                 reuse=reuse_flag)
+        dense3 = tf.layers.dense(dense2, 512, activation=tf.nn.relu, name='{}_dense3'.format(self.prefix),
+                                 reuse=reuse_flag)
+        return dense3
diff --git a/generate_reward_workspaces.py b/generate_reward_workspaces.py
new file mode 100644
index 0000000..05788bb
--- /dev/null
+++ b/generate_reward_workspaces.py
@@ -0,0 +1,25 @@
+from workspace_generation_utils import *
+from image_cache import ImageCache
+import os
+
+TOTAL_WORKSPACES = 10000
+OUTPUT_DIR = "scenario_params/vision_harder"
+
+
+if not os.path.isdir(OUTPUT_DIR):
+    os.makedirs(OUTPUT_DIR)
+
+generator = WorkspaceGenerator(obstacle_count_probabilities={2: 0.05, 3: 0.5, 4: 0.4, 5: 0.05})
+for i in range(TOTAL_WORKSPACES):
+    save_path = os.path.join(OUTPUT_DIR, '{}_workspace.pkl'.format(i))
+
+    if os.path.exists(save_path):
+        print("workspace %d already exists" % i)
+        continue
+
+    print("generateing workspace %d" % i)
+    workspace_params = generator.generate_workspace()
+    workspace_params.save(save_path)
+
+print("Creating Image Cache")
+ImageCache(OUTPUT_DIR, True)
diff --git a/image_vae_model.py b/image_vae_model.py
new file mode 100644
index 0000000..3f1aef5
--- /dev/null
+++ b/image_vae_model.py
@@ -0,0 +1,194 @@
+from reward_data_manager import get_image_cache
+import time
+import datetime
+import numpy as np
+import os
+import yaml
+import tensorflow as tf
+from vae_network import VAENetwork
+
+
+class VAEModel:
+
+    def __init__(self, model_name, config, models_base_dir, tensorboard_dir):
+
+        self.model_name = model_name
+        self.config = config
+
+        self.model_dir = os.path.join(models_base_dir, self.model_name)
+        if not os.path.exists(self.model_dir):
+            os.makedirs(self.model_dir)
+
+        self.train_summaries = []
+        self.test_summaries = []
+
+        self.epochs = config['general']['epochs']
+        self.save_every_epochs = config['general']['save_every_epochs']
+        self.train_vae = config['reward']['train_vae']
+
+        inputs_example = tf.placeholder(tf.float32, (None, 55, 111), name='example')
+        self.network = VAENetwork(config, self.model_dir, inputs_example.shape)
+
+        self.global_step = 0
+        self.global_step_var = tf.Variable(0, trainable=False)
+
+        self.loss = self.init_loss()
+        self.optimizer = self.init_optimizer()
+
+        with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd:
+            yaml.dump(config, fd)
+
+        self.train_board = self.TensorBoard(tensorboard_dir, 'train_' + model_name, self.train_summaries)
+        self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries)
+
+    def load(self, session):
+        self.network.load_weights(session)
+
+    def make_feed(self, data_batch):
+        return self.network.make_feed(*data_batch)
+
+    def predict(self, data_batch, session):
+        feed = self.make_feed(data_batch)
+        return session.run([self.prediction], feed)[0]
+
+    def init_loss(self):
+        status_loss_scale = self.config['reward']['cross_entropy_coefficient']
+        img_loss, latent_loss, total_loss = self.network.get_loss()
+
+        image_loss_summary = tf.summary.scalar('Image_Loss', img_loss)
+        latent_loss_summary = tf.summary.scalar('Latent_Loss', latent_loss)
+
+        regularization_loss = tf.losses.get_regularization_loss()
+        regularization_loss_summary = tf.summary.scalar('Regularization_Loss', regularization_loss)
+
+        # total_loss = total_loss + regularization_loss
+        total_loss_summary = tf.summary.scalar('Total_Loss', total_loss)
+
+        self.train_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary]
+        self.test_summaries += [image_loss_summary, latent_loss_summary, regularization_loss_summary, total_loss_summary]
+
+        return total_loss
+
+    def init_optimizer(self):
+        initial_learn_rate = self.config['reward']['initial_learn_rate']
+        decrease_learn_rate_after = self.config['reward']['decrease_learn_rate_after']
+        learn_rate_decrease_rate = self.config['reward']['learn_rate_decrease_rate']
+
+        learning_rate = tf.train.exponential_decay(initial_learn_rate,
+                                                   self.global_step_var,
+                                                   decrease_learn_rate_after,
+                                                   learn_rate_decrease_rate,
+                                                   staircase=True)
+        self.train_summaries.append(tf.summary.scalar('Learn_Rate', learning_rate))
+
+        optimizer = tf.train.AdamOptimizer(learning_rate)
+
+        gradients, variables = zip(*optimizer.compute_gradients(self.loss, tf.trainable_variables()))
+        initial_gradients_norm = tf.global_norm(gradients)
+        gradient_limit = self.config['reward']['gradient_limit']
+        if gradient_limit > 0.0:
+            gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm)
+        clipped_gradients_norm = tf.global_norm(gradients)
+        initial_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Initial', initial_gradients_norm)
+        clipped_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Clipped', clipped_gradients_norm)
+        self.train_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]
+        self.test_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]
+
+        return optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step_var)
+
+    def _train_batch(self, train_batch, session):
+        train_feed = {self.network.workspace_image_inputs: train_batch}
+        train_summary, self.global_step, img_loss, _ = session.run(
+            [self.train_board.summaries, self.global_step_var, self.network.encoded, self.optimizer],
+            train_feed)
+        # print(img_loss)
+        self.train_board.writer.add_summary(train_summary, self.global_step)
+
+    def _test_batch(self, test_batch, session):
+        test_feed = {self.network.workspace_image_inputs: test_batch}
+        test_summary = session.run(
+            [self.test_board.summaries],
+            test_feed)[0]
+        self.test_board.writer.add_summary(test_summary, self.global_step)
+        self.test_board.writer.flush()
+
+    def train(self, train_data, test_data, session):
+        session.run(tf.global_variables_initializer())
+        session.run(tf.local_variables_initializer())
+
+        test_every_batches = self.config['reward']['test_every_batches']
+
+        total_train_batches = 0
+        for epoch in range(self.epochs):
+
+            train_batch_count = 1
+            for train_batch in train_data:
+                self._train_batch(train_batch, session)
+                print("Finished epoch %d/%d batch %d/%d" % (epoch+1, self.epochs, train_batch_count, total_train_batches))
+                train_batch_count += 1
+
+                if train_batch_count % test_every_batches == 0:
+                    test_batch = next(test_data.__iter__())  # random test batch
+                    self._test_batch(test_batch, session)
+                    # save the model
+                    # self.network.save_weights(session, self.global_step)
+
+            total_train_batches = train_batch_count - 1
+            self.train_board.writer.flush()
+
+            test_batch = next(test_data.__iter__()) # random test batch
+            self._test_batch(test_batch, session)
+
+            # save the model
+            # if epoch == self.epochs - 1 or epoch % self.save_every_epochs == self.save_every_epochs - 1:
+            #     self.network.save_weights(session, self.global_step)
+
+            print('done epoch {} of {}, global step {}'.format(epoch, self.epochs, self.global_step))
+
+    class TensorBoard:
+
+        def __init__(self, tensorboard_path, board_name, summaries):
+            self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name))
+            self.summaries = tf.summary.merge(summaries)
+
+
+def count_weights():
+    total_parameters = 0
+    for variable in tf.trainable_variables():
+        # shape is an array of tf.Dimension
+        shape = variable.get_shape()
+        variable_parameters = 1
+        for dim in shape:
+            variable_parameters *= dim.value
+        total_parameters += variable_parameters
+    print(total_parameters)
+
+if __name__ == '__main__':
+    # read the config
+    config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
+    with open(config_path, 'r') as yml_file:
+        config = yaml.load(yml_file)
+        print('------------ Config ------------')
+        print(yaml.dump(config))
+
+    model_name = "vae" + datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
+
+    image_cache = get_image_cache(config)
+    batch_size = 1
+    images_data = [image.np_array for image in image_cache.items.values()]
+    images_batch_data = [images_data[i:i+batch_size] for i in range(0, len(images_data), batch_size)]
+
+    train_data_count = int(len(images_batch_data) * 0.8)
+    train_data = images_batch_data[:train_data_count]
+    test_data = images_batch_data[train_data_count:]
+
+    models_base_dir = os.path.join('data', 'reward', 'model')
+    vae_model = VAEModel(model_name, config, models_base_dir, tensorboard_dir=models_base_dir)
+
+
+
+    gpu_usage = config['general']['gpu_usage']
+    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage))
+    with tf.Session(config=session_config) as session:
+        count_weights()
+        vae_model.train(train_data, test_data, session)
diff --git a/pre_trained_reward.py b/pre_trained_reward.py
deleted file mode 100644
index 37725c7..0000000
--- a/pre_trained_reward.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import bz2
-import numpy as np
-import random
-import pickle
-import os
-import tensorflow as tf
-import tensorflow.contrib.layers as tf_layers
-
-from dqn_model import DqnModel
-from modeling_utils import get_activation
-
-
-class PreTrainedReward:
-
-    def __init__(self, model_name, config):
-        self._reuse_flag = False
-
-        self.config = config
-        self.is_vision_enabled = 'vision' in config['general']['scenario']
-
-        self.joints_inputs = tf.placeholder(tf.float32, (None, 4), name='joints_inputs')
-        self.goal_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='goal_joints_inputs')
-        self.workspace_image_inputs, self.images_3d = None, None
-        if self.is_vision_enabled:
-            self.workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs')
-            self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1)
-        self.goal_pose_inputs = tf.placeholder(tf.float32, (None, 2), name='goal_pose_inputs')
-        self.action_inputs = tf.placeholder(tf.float32, (None, 4), name='action_inputs')
-        self.transition_label = tf.placeholder_with_default([[0.0]*3], (None, 3), name='labeled_transition')
-        current_variables_count = len(tf.trainable_variables())
-        self.reward_prediction, self.status_softmax_logits = self.create_reward_network(
-            self.joints_inputs, self.action_inputs, self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d
-        )
-        reward_variables = tf.trainable_variables()[current_variables_count:]
-
-        # model path to load
-        self.model_name = model_name
-        self.saver_dir = os.path.join(os.getcwd(), 'data', 'reward', 'model', model_name)
-        assert os.path.exists(self.saver_dir)
-        self.saver = tf.train.Saver(reward_variables, max_to_keep=4, save_relative_paths=self.saver_dir)
-
-    @staticmethod
-    def _generate_goal_features(goal_joints_inputs, goal_pose_inputs):
-        features = [goal_joints_inputs]
-        if goal_pose_inputs is not None:
-            features.append(goal_pose_inputs)
-        return tf.concat(features, axis=1)
-
-    def _next_state_model(self, joints_inputs, action_inputs):
-        # next step is a deterministic computation
-        action_step_size = self.config['openrave_rl']['action_step_size']
-        step = action_inputs * action_step_size
-        unclipped_result = joints_inputs + step
-        # we initiate an openrave manager to get the robot, to get the joint bounds and the safety
-        joint_safety = 0.0001
-        lower_bounds = [-2.617, -1.571, -1.571, -1.745, -2.617]
-        lower_bounds = [b + joint_safety for b in lower_bounds[1:]]
-        upper_bounds = [-b for b in lower_bounds]
-
-        # clip the result
-        clipped_result = tf.maximum(unclipped_result, lower_bounds)
-        clipped_result = tf.minimum(clipped_result, upper_bounds)
-        return clipped_result, unclipped_result
-
-    def create_reward_network(
-            self, joints_inputs, action_inputs, goal_joints_inputs, goal_pose_inputs, images_3d):
-        name_prefix = 'reward'
-        # get the next joints
-        clipped_next_joints, unclipped_next_joints = self._next_state_model(joints_inputs, action_inputs)
-
-        # predict the transition classification
-        layers = self.config['reward']['layers'] + [3]
-        scale = 0.0
-        if 'l2_regularization_coefficient' in self.config['reward']:
-            scale = self.config['reward']['l2_regularization_coefficient']
-        current = tf.concat(
-            (clipped_next_joints, self._generate_goal_features(goal_joints_inputs, goal_pose_inputs)), axis=1)
-        # add vision if needed
-        if self.is_vision_enabled:
-            visual_inputs = DqnModel(name_prefix).predict(images_3d, self._reuse_flag)
-            current = tf.concat((current, visual_inputs), axis=1)
-        for i, layer_size in enumerate(layers):
-            _activation = None if i == len(layers) - 1 else get_activation(self.config['reward']['activation'])
-            current = tf.layers.dense(
-                current, layer_size, activation=_activation, name='{}_layers_{}'.format(name_prefix, i),
-                kernel_regularizer=tf_layers.l2_regularizer(scale), reuse=self._reuse_flag
-            )
-        softmax_logits = current
-        softmax_res = tf.nn.softmax(softmax_logits)
-
-        # if the one-hot input is fed, is labeled will be 1.0 otherwise it will be zero
-        is_labeled = tf.expand_dims(tf.reduce_max(self.transition_label, axis=1), axis=1)
-        reward_calculation_input = self.transition_label + tf.multiply(1.0 - is_labeled, softmax_res)
-
-        # get the classification reward
-        classification_reward = tf.layers.dense(
-            reward_calculation_input, 1, activation=None, use_bias=False,
-            name='{}_classification_reward'.format(name_prefix), reuse=self._reuse_flag
-            )
-
-        # get the clipping-related reward
-        # clipped_difference = tf.expand_dims(tf.norm(unclipped_next_joints - clipped_next_joints, axis=1), axis=1)  # this is the original
-        # clipped_difference = tf.expand_dims(tf.reduce_sum(tf.zeros_like(clipped_next_joints), axis=1), axis=1)  # this will have no gradient backlash
-        clipped_difference = tf.expand_dims(tf.reduce_sum(tf.abs(unclipped_next_joints - clipped_next_joints), axis=1), axis=1)
-
-        clipping_reward = tf.layers.dense(
-            clipped_difference, 1, activation=None, use_bias=False, name='{}_clipping_weight'.format(name_prefix),
-            reuse=self._reuse_flag
-        )
-
-        total_reward = classification_reward + clipping_reward
-        self._reuse_flag = True
-        return total_reward, softmax_logits
-
-    def load_weights(self, sess):
-        self.saver.restore(sess, tf.train.latest_checkpoint(self.saver_dir))
-
-    def make_prediction(self, sess, all_start_joints, all_goal_joints, all_actions, all_goal_poses,
-                        all_transition_labels=None, images=None):
-        feed = self.make_feed(all_start_joints, all_goal_joints, all_actions, all_goal_poses, images=images,
-                              all_transition_labels=all_transition_labels)
-        return sess.run([self.reward_prediction, self.status_softmax_logits], feed)
-
-    def make_feed(self, all_start_joints, all_goal_joints, all_actions, all_goal_poses, images=None,
-                  all_transition_labels=None):
-        feed = {
-            self.joints_inputs: all_start_joints,
-            self.goal_joints_inputs: all_goal_joints,
-            self.action_inputs: all_actions,
-        }
-        if self.goal_pose_inputs is not None:
-            feed[self.goal_pose_inputs] = all_goal_poses
-        if self.is_vision_enabled:
-            assert images is not None
-            assert images[0] is not None
-            feed[self.workspace_image_inputs] = images
-        if all_transition_labels is not None:
-            feed[self.transition_label] = all_transition_labels
-        return feed
-
-
-def oversample_batch(current_batch, oversample_large_magnitude=None):
-    if oversample_large_magnitude is None:
-        return current_batch
-    oversample_success = oversample_large_magnitude[0]
-    oversample_collision = oversample_large_magnitude[1]
-    status = [b[-1] for b in current_batch]
-    success_reward_indices = [i for i, s in enumerate(status) if s == 3]
-    if len(success_reward_indices) < 3:
-        return None
-    collision_reward_indices = [i for i, s in enumerate(status) if s == 2]
-    if len(collision_reward_indices) < 3:
-        return None
-    other_reward_indices = [i for i, s in enumerate(status) if s == 1]
-    assert len(success_reward_indices) + len(collision_reward_indices) + len(other_reward_indices) == len(current_batch)
-    sample_size = len(other_reward_indices)
-    batch_indices = other_reward_indices
-    # sample_size = min(100, len(other_reward_indices))
-    # batch_indices = list(np.random.choice(other_reward_indices, sample_size))
-    success_sample_size = int(oversample_success * sample_size)
-    success_super_sample = list(np.random.choice(success_reward_indices, success_sample_size))
-    batch_indices.extend(success_super_sample)
-    collision_sample_size = int(oversample_collision * sample_size)
-    collision_super_sample = list(np.random.choice(collision_reward_indices, collision_sample_size))
-    batch_indices.extend(collision_super_sample)
-    return [current_batch[i] for i in batch_indices]
-
-
-def get_batch_and_labels(batch, openrave_manager, image_cache):
-    all_start_joints = []
-    all_goal_joints = []
-    all_actions = []
-    all_rewards = []
-    all_goal_poses = []
-    all_status = []
-    all_images = None
-    if image_cache is not None:
-        all_images = []
-    for i in range(len(batch)):
-        if image_cache is None:
-            workspace_id = None
-            start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i]
-        else:
-            workspace_id, start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i]
-        goal_pose = openrave_manager.get_target_pose(goal_joints)
-        all_start_joints.append(start_joints[1:])
-        all_goal_joints.append(goal_joints[1:])
-        all_actions.append(action[1:])
-        all_rewards.append(reward)
-        all_status.append(status)
-        all_goal_poses.append(goal_pose)
-        if image_cache is not None:
-            image = image_cache.items[workspace_id].np_array
-            all_images.append(image)
-    return [all_start_joints, all_goal_joints, all_actions, all_goal_poses, all_images], all_rewards, all_status
-
-
-def compute_stats_single_class(real_status, real_reward, status_prediction, reward_prediction, class_indicator):
-    class_indices = [i for i, s in enumerate(real_status) if s == class_indicator]
-    if len(class_indices) == 0:
-        accuracy = 0.0
-        class_average_absolute_error = 0.0
-        class_max_absolute_error = 0.0
-    else:
-        my_status_prediction = [status_prediction[i] for i in class_indices]
-        best_label = np.argmax(my_status_prediction, axis=1)
-        best_label += 1
-        hit_cont = len([i for i, b in enumerate(best_label) if b == class_indicator])
-        accuracy = float(hit_cont) / len(class_indices)
-        difference = [np.abs(real_reward[i] - reward_prediction[i]) for i in class_indices]
-        class_average_absolute_error = np.mean(difference)
-        class_max_absolute_error = np.max(difference)
-    return class_indices, [class_average_absolute_error, class_max_absolute_error, accuracy]
-
-
-def compute_stats_per_class(real_status, real_reward, status_prediction, reward_prediction):
-    goal_rewards_indices, goal_stats = compute_stats_single_class(
-        real_status, real_reward, status_prediction, reward_prediction, 3)
-    collision_rewards_indices, collision_stats = compute_stats_single_class(
-        real_status, real_reward, status_prediction, reward_prediction, 2)
-    other_rewards_indices, other_stats = compute_stats_single_class(
-        real_status, real_reward, status_prediction, reward_prediction, 1)
-    assert len(goal_rewards_indices) + len(collision_rewards_indices) + len(other_rewards_indices) == len(real_reward)
-    return goal_stats, collision_stats, other_stats
-
-
-def load_data_from(data_dir, max_read=None, is_vision=False):
-    assert os.path.exists(data_dir)
-    files = [file for file in os.listdir(data_dir) if file.endswith(".pkl")]
-    assert len(files) > 0
-    random.shuffle(files)
-    total_buffer = []
-    for file in files:
-        if max_read is not None and len(total_buffer) > max_read:
-            break
-        compressed_file = bz2.BZ2File(os.path.join(data_dir, file), 'r')
-        current_buffer = pickle.load(compressed_file)
-        compressed_file.close()
-        if is_vision:
-            parts = file.split('_')
-            workspace_id = '{}_{}.pkl'.format(parts[0], parts[1])
-            current_buffer = [tuple([workspace_id] + list(t)) for t in current_buffer]
-        total_buffer.extend(current_buffer)
-    return total_buffer
diff --git a/resnet_model.py b/resnet_model.py
new file mode 100644
index 0000000..a592b67
--- /dev/null
+++ b/resnet_model.py
@@ -0,0 +1,241 @@
+# Most of the code in this model was taken from: https://github.com/wenxinxu/resnet-in-tensorflow/blob/master/resnet.py
+
+import tensorflow as tf
+from coordnet_model import coord_conv
+import numpy as np
+import yaml
+import os
+import datetime
+import time
+
+BN_EPSILON = 0.001
+
+
+class ResNetModel:
+    def __init__(self, prefix, config):
+        self.prefix = '{}_resnet'.format(prefix)
+        self.config = config
+        self.use_coordnet = self.config['network']['use_coordnet']
+        self.l2_regularization_coefficient = self.config['train']['l2_regularization_coefficient']
+
+    def activation_summary(self, x):
+        """
+        :param x: A Tensor
+        :return: Add histogram summary and scalar summary of the sparsity of the tensor
+        """
+        tensor_name = x.op.name
+        tf.summary.histogram(tensor_name + '/activations', x)
+        tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
+
+    def create_variables(self, name, shape, initializer=tf.contrib.layers.xavier_initializer(), is_fc_layer=False):
+        """
+        :param name: A string. The name of the new variable
+        :param shape: A list of dimensions
+        :param initializer: User Xavier as default.
+        :param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc
+        layers.
+        :return: The created variable
+        """
+
+        ## TODO: to allow different weight decay to fully connected layer and conv layer
+        regularizer = tf.contrib.layers.l2_regularizer(scale=self.l2_regularization_coefficient)
+
+        new_variables = tf.get_variable(name, shape=shape, initializer=initializer,
+                                        regularizer=regularizer)
+        return new_variables
+
+    def output_layer(self, input_layer, num_labels):
+        """
+        :param input_layer: 2D tensor
+        :param num_labels: int. How many output labels in total? (10 for cifar10 and 100 for cifar100)
+        :return: output layer Y = WX + B
+        """
+        input_dim = input_layer.get_shape().as_list()[-1]
+        fc_w = self.create_variables(name='fc_weights', shape=[input_dim, num_labels], is_fc_layer=True,
+                                initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
+        fc_b = self.create_variables(name='fc_bias', shape=[num_labels], initializer=tf.zeros_initializer())
+
+        fc_h = tf.matmul(input_layer, fc_w) + fc_b
+        return fc_h
+
+    def batch_normalization_layer(self, input_layer, dimension):
+        """
+        Helper function to do batch normalziation
+        :param input_layer: 4D tensor
+        :param dimension: input_layer.get_shape().as_list()[-1]. The depth of the 4D tensor
+        :return: the 4D tensor after being normalized
+        """
+        mean, variance = tf.nn.moments(input_layer, axes=[0, 1, 2])
+        beta = tf.get_variable('beta', dimension, tf.float32,
+                               initializer=tf.constant_initializer(0.0, tf.float32))
+        gamma = tf.get_variable('gamma', dimension, tf.float32,
+                                initializer=tf.constant_initializer(1.0, tf.float32))
+        bn_layer = tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, BN_EPSILON)
+
+        return bn_layer
+
+    def conv_bn_relu_layer(self, input_layer, filter_shape, stride):
+        """
+        A helper function to conv, batch normalize and relu the input tensor sequentially
+        :param input_layer: 4D tensor
+        :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number]
+        :param stride: stride size for conv
+        :return: 4D tensor. Y = Relu(batch_normalize(conv(X)))
+        """
+
+        out_channel = filter_shape[-1]
+
+        if self.use_coordnet:
+            conv_layer = coord_conv(55, 111, False, input_layer, filter_shape[-1], filter_shape[0:2], stride,
+                                    padding='same', use_bias=True, name='{}_conv1'.format(self.prefix))
+        else:
+            filter = self.create_variables(name='conv', shape=filter_shape)
+            conv_layer = tf.nn.conv2d(input_layer, filter, strides=[1, stride, stride, 1], padding='SAME')
+        bn_layer = self.batch_normalization_layer(conv_layer, out_channel)
+
+        output = tf.nn.relu(bn_layer)
+        return output
+
+    def bn_relu_conv_layer(self, input_layer, filter_shape, stride):
+        """
+        A helper function to batch normalize, relu and conv the input layer sequentially
+        :param input_layer: 4D tensor
+        :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number]
+        :param stride: stride size for conv
+        :return: 4D tensor. Y = conv(Relu(batch_normalize(X)))
+        """
+
+        in_channel = input_layer.get_shape().as_list()[-1]
+
+        bn_layer = self.batch_normalization_layer(input_layer, in_channel)
+        relu_layer = tf.nn.relu(bn_layer)
+
+        filter = self.create_variables(name='conv', shape=filter_shape)
+        conv_layer = tf.nn.conv2d(relu_layer, filter, strides=[1, stride, stride, 1], padding='SAME')
+        return conv_layer
+
+    def residual_block(self, input_layer, output_channel, first_block=False):
+        """
+        Defines a residual block in ResNet
+        :param input_layer: 4D tensor
+        :param output_channel: int. return_tensor.get_shape().as_list()[-1] = output_channel
+        :param first_block: if this is the first residual block of the whole network
+        :return: 4D tensor.
+        """
+        input_channel = input_layer.get_shape().as_list()[-1]
+
+        # When it's time to "shrink" the image size, we use stride = 2
+        if input_channel * 2 == output_channel:
+            increase_dim = True
+            stride = 2
+        elif input_channel == output_channel:
+            increase_dim = False
+            stride = 1
+        else:
+            raise ValueError('Output and input channel does not match in residual blocks!!!')
+
+        # The first conv layer of the first residual block does not need to be normalized and relu-ed.
+        with tf.variable_scope('conv1_in_block'):
+            if first_block:
+                filter = self.create_variables(name='conv', shape=[3, 3, input_channel, output_channel])
+                conv1 = tf.nn.conv2d(input_layer, filter=filter, strides=[1, 1, 1, 1], padding='SAME')
+            else:
+                conv1 = self.bn_relu_conv_layer(input_layer, [3, 3, input_channel, output_channel], stride)
+
+        with tf.variable_scope('conv2_in_block'):
+            conv2 = self.bn_relu_conv_layer(conv1, [3, 3, output_channel, output_channel], 1)
+
+        # When the channels of input layer and conv2 does not match, we add zero pads to increase the
+        #  depth of input layers
+        if increase_dim is True:
+            pooled_input = tf.nn.avg_pool(input_layer, ksize=[1, 2, 2, 1],
+                                          strides=[1, 2, 2, 1], padding='VALID')
+            padding_w = input_layer.shape[1].value % 2
+            padding_h = input_layer.shape[2].value % 2
+            padded_input = tf.pad(pooled_input, [[0, 0], [padding_w, 0], [padding_h, 0], [input_channel // 2,
+                                                                                          input_channel // 2]])
+        else:
+            padded_input = input_layer
+
+        output = conv2 + padded_input
+        return output
+
+    def predict(self, input_tensor_batch, n, reuse):
+        """
+        The main function that defines the ResNet. total layers = 1 + 2n + 2n + 2n +1 = 6n + 2
+        :param input_tensor_batch: 4D tensor
+        :param n: num_residual_blocks
+        :param reuse: To build train graph, reuse=False. To build validation graph and share weights
+        with train graph, resue=True
+        :return: last layer in the network. Not softmax-ed
+        """
+
+        layers = []
+        with tf.variable_scope('conv0', reuse=reuse):
+            if self.use_coordnet:
+                first_filter_shape = [3, 3, 3, 16]
+            else:
+                first_filter_shape = [3, 3, 1, 16]
+            conv0 = self.conv_bn_relu_layer(input_tensor_batch, first_filter_shape, 1)
+            self.activation_summary(conv0)
+            layers.append(conv0)
+
+        for i in range(n):
+            with tf.variable_scope('conv1_%d' % i, reuse=reuse):
+                if i == 0:
+                    conv1 = self.residual_block(layers[-1], 16, first_block=True)
+                else:
+                    conv1 = self.residual_block(layers[-1], 16)
+                self.activation_summary(conv1)
+                layers.append(conv1)
+
+        for i in range(n):
+            with tf.variable_scope('conv2_%d' % i, reuse=reuse):
+                conv2 = self.residual_block(layers[-1], 32)
+                self.activation_summary(conv2)
+                layers.append(conv2)
+
+        for i in range(n):
+            with tf.variable_scope('conv3_%d' % i, reuse=reuse):
+                conv3 = self.residual_block(layers[-1], 64)
+                layers.append(conv3)
+            # assert conv3.get_shape().as_list()[1:] == [8, 8, 64]
+
+        with tf.variable_scope('fc', reuse=reuse):
+            in_channel = layers[-1].get_shape().as_list()[-1]
+            bn_layer = self.batch_normalization_layer(layers[-1], in_channel)
+            relu_layer = tf.nn.relu(bn_layer)
+            global_pool = tf.reduce_mean(relu_layer, [1, 2])
+
+            assert global_pool.get_shape().as_list()[-1:] == [64]
+            output = self.output_layer(global_pool, 10)
+            layers.append(output)
+
+        return layers[-1]
+
+    def test_graph(self, train_dir='logs'):
+        """
+        Run this function to look at the graph structure on tensorboard. A fast way!
+        :param train_dir:
+        """
+        if self.use_coordnet:
+            input_tensor = tf.constant(np.ones([111, 55, 32, 3]), dtype=tf.float32)
+        else:
+            input_tensor = tf.constant(np.ones([128, 32, 32, 1]), dtype=tf.float32)
+        result = self.predict(input_tensor, 2, reuse=False)
+        init = tf.initialize_all_variables()
+        sess = tf.Session()
+        sess.run(init)
+        summary_writer = tf.summary.FileWriter(train_dir, graph=sess.graph)
+
+
+if __name__ == '__main__':
+    # read the config
+    config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
+    with open(config_path, 'r') as yml_file:
+        config = yaml.load(yml_file)
+        print('------------ Config ------------')
+        print(yaml.dump(config))
+    model_name = "renset_" + datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
+    resnet = ResNetModel(model_name, config)
+    resnet.test_graph()
diff --git a/reward_collision_model.py b/reward_collision_model.py
new file mode 100644
index 0000000..ebb8093
--- /dev/null
+++ b/reward_collision_model.py
@@ -0,0 +1,206 @@
+from reward_collision_network import CollisionNetwork
+import reward_data_manager
+from reward_data_manager import get_train_and_test_datasets, get_batch_and_labels, get_image_cache
+import time
+import datetime
+import numpy as np
+import os
+import yaml
+import tensorflow as tf
+
+
+class CollisionModel:
+
+    def __init__(self, model_name, config, models_base_dir, tensorboard_dir):
+
+        self.model_name = model_name
+        self.config = config
+
+        self.model_dir = os.path.join(models_base_dir, self.model_name)
+        if not os.path.exists(self.model_dir):
+            os.makedirs(self.model_dir)
+
+        self.train_summaries = []
+        self.test_summaries = []
+
+        self.epochs = config['train']['epochs']
+        self.save_every_epochs = config['train']['save_every_epochs']
+
+        self.network = CollisionNetwork(config, self.model_dir)
+        self.net_output = self.network.status_softmax_logits
+        self.status_input = tf.placeholder(tf.int32, [None, ])
+        # TODO: fix collision_prob
+        self.collision_prob = tf.expand_dims(tf.nn.softmax(self.net_output)[:, 1], -1)
+        self.prediction = tf.argmax(tf.nn.softmax(self.net_output), axis=1, output_type=tf.int32)
+
+        self.global_step = 0
+        self.global_step_var = tf.Variable(0, trainable=False)
+
+        self.loss = self.init_loss()
+        self.test_measures = self.add_test_measures()
+        self.optimizer = self.init_optimizer()
+
+        with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd:
+            yaml.dump(config, fd)
+
+        self.train_board = self.TensorBoard(tensorboard_dir, 'train_' + model_name, self.train_summaries)
+        self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries)
+
+    def load(self, session):
+        self.network.load_weights(session)
+
+    def make_feed(self, data_batch):
+        return self.network.make_feed(*data_batch)
+
+    def predict(self, data_batch, session):
+        feed = self.make_feed(data_batch)
+        return session.run([self.prediction], feed)[0]
+
+    def init_loss(self):
+        status_loss_scale = self.config['train']['cross_entropy_coefficient']
+        status_loss = status_loss_scale * \
+                           tf.losses.sparse_softmax_cross_entropy(labels=self.status_input - 1, logits=self.net_output)
+        status_loss_summary = tf.summary.scalar('Status_Loss', status_loss)
+
+        regularization_loss = tf.losses.get_regularization_loss()
+        regularization_loss_summary = tf.summary.scalar('Regularization_Loss', regularization_loss)
+
+        total_loss = status_loss + regularization_loss
+        total_loss_summary = tf.summary.scalar('Total_Loss', total_loss)
+
+        self.train_summaries += [status_loss_summary, regularization_loss_summary, total_loss_summary]
+        self.test_summaries += [status_loss_summary, regularization_loss_summary, total_loss_summary]
+
+        return total_loss
+
+    def add_test_measures(self):
+        labels = self.status_input - 1
+        accuracy = tf.reduce_mean(tf.cast(tf.equal(labels, self.prediction), tf.float32))
+        TP = tf.count_nonzero((self.prediction) * (labels), dtype=tf.float32) + tf.Variable(0.001)
+        TN = tf.count_nonzero((self.prediction - 1) * (labels - 1), dtype=tf.float32) + tf.Variable(0.001)
+        FP = tf.count_nonzero(self.prediction * (labels - 1), dtype=tf.float32) + tf.Variable(0.001)
+        FN = tf.count_nonzero((self.prediction - 1) * labels, dtype=tf.float32) + tf.Variable(0.001)
+        precision = TP / (TP + FP)
+        recall = TP / (TP + FN)
+        accuracy_summary = tf.summary.scalar('Accuracy', accuracy)
+        recall_summary = tf.summary.scalar('Recall', recall)
+        precision_summary = tf.summary.scalar('Precision', precision)
+        self.test_summaries += [accuracy_summary, recall_summary, precision_summary]
+        self.train_summaries += [accuracy_summary, recall_summary, precision_summary]
+        return [accuracy, recall, precision]
+
+    def init_optimizer(self):
+        initial_learn_rate = self.config['train']['initial_learn_rate']
+        decrease_learn_rate_after = self.config['train']['decrease_learn_rate_after']
+        learn_rate_decrease_rate = self.config['train']['learn_rate_decrease_rate']
+
+        learning_rate = tf.train.exponential_decay(initial_learn_rate,
+                                                   self.global_step_var,
+                                                   decrease_learn_rate_after,
+                                                   learn_rate_decrease_rate,
+                                                   staircase=True)
+        self.train_summaries.append(tf.summary.scalar('Learn_Rate', learning_rate))
+
+        optimizer = tf.train.AdamOptimizer(learning_rate)
+
+        gradients, variables = zip(*optimizer.compute_gradients(self.loss, tf.trainable_variables()))
+        initial_gradients_norm = tf.global_norm(gradients)
+        gradient_limit = self.config['train']['gradient_limit']
+        if gradient_limit > 0.0:
+            gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm)
+        clipped_gradients_norm = tf.global_norm(gradients)
+        initial_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Initial', initial_gradients_norm)
+        clipped_gradients_norm_summary = tf.summary.scalar('Gradients_Norm_Clipped', clipped_gradients_norm)
+        self.train_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]
+        self.test_summaries += [initial_gradients_norm_summary, clipped_gradients_norm_summary]
+
+        return optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step_var)
+
+    def _train_batch(self, train_batch, train_status_batch, session):
+        batch_start_joints, batch_actions, batch_images, _, _ = train_batch
+        train_feed = self.network.make_feed(batch_start_joints, batch_actions, batch_images)
+        train_feed[self.status_input] = np.array(train_status_batch)
+        train_summary, self.global_step, _, _ = session.run(
+            [self.train_board.summaries, self.global_step_var, self.optimizer, self.test_measures],
+            train_feed)
+        self.train_board.writer.add_summary(train_summary, self.global_step)
+
+    def _test_batch(self, test_batch, test_status_batch, session):
+        batch_start_joints, batch_actions, batch_images, _, _ = test_batch
+        test_feed = self.network.make_feed(batch_start_joints, batch_actions, batch_images)
+        test_feed[self.status_input] = np.array(test_status_batch)
+        test_summary = session.run(
+            [self.test_board.summaries] + self.test_measures,
+            test_feed)[0]
+        self.test_board.writer.add_summary(test_summary, self.global_step)
+        self.test_board.writer.flush()
+
+    def train(self, train_data, test_data, image_cache, session):
+        session.run(tf.global_variables_initializer())
+        session.run(tf.local_variables_initializer())
+
+        test_every_batches = self.config['train']['test_every_batches']
+
+        total_train_batches = 0
+        for epoch in range(self.epochs):
+
+            train_batch_count = 1
+            for train_batch in train_data:
+
+                train_batch, train_status_batch = get_batch_and_labels(train_batch, image_cache)
+                # assert if train_status contains goal status
+                assert(np.all(np.array(train_status_batch) != reward_data_manager.GOAL_STATUS))
+
+                self._train_batch(train_batch, train_status_batch, session)
+                print("Finished epoch %d/%d batch %d/%d" % (epoch+1, self.epochs, train_batch_count, total_train_batches))
+                train_batch_count += 1
+
+                if train_batch_count % test_every_batches == 0:
+                    test_batch = next(test_data.__iter__())  # random test batch
+                    test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache)
+                    self._test_batch(test_batch, test_status_batch, session)
+                    # save the model
+                    self.network.save_weights(session, self.global_step)
+
+            total_train_batches = train_batch_count - 1
+            self.train_board.writer.flush()
+
+            test_batch = next(test_data.__iter__()) # random test batch
+            test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache)
+            self._test_batch(test_batch, test_status_batch, session)
+
+            # save the model
+            if epoch == self.epochs - 1 or epoch % self.save_every_epochs == self.save_every_epochs - 1:
+                self.network.save_weights(session, self.global_step)
+
+            print('done epoch {} of {}, global step {}'.format(epoch, self.epochs, self.global_step))
+
+    class TensorBoard:
+
+        def __init__(self, tensorboard_path, board_name, summaries):
+            self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name))
+            self.summaries = tf.summary.merge(summaries)
+
+
+if __name__ == '__main__':
+    # read the config
+    config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
+    with open(config_path, 'r') as yml_file:
+        config = yaml.load(yml_file)
+        print('------------ Config ------------')
+        print(yaml.dump(config))
+
+    model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
+    collision_model_name = "collision_simple_trained"
+    models_base_dir = os.path.join('data', 'reward', 'model')
+    collision_model = CollisionModel(model_name, config, models_base_dir, tensorboard_dir=models_base_dir)
+
+    train_data, test_data = get_train_and_test_datasets(config, is_collision_model=True)
+    image_cache = get_image_cache(config)
+
+    gpu_usage = config['general']['gpu_usage']
+    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage))
+    with tf.Session(config=session_config) as session:
+        collision_model.train(train_data, test_data, image_cache, session)
+    train_data.stop()
+    test_data.stop()
diff --git a/reward_collision_network.py b/reward_collision_network.py
new file mode 100644
index 0000000..d2b33e9
--- /dev/null
+++ b/reward_collision_network.py
@@ -0,0 +1,107 @@
+import os
+import tensorflow as tf
+import tensorflow.contrib.layers as tf_layers
+from dqn_model import DqnModel
+from modeling_utils import get_activation
+from resnet_model import ResNetModel
+
+
+class CollisionNetwork:
+
+    def __init__(self, config, model_dir):
+        self._reuse_flag = False
+
+        self.config = config
+        self.is_vision_enabled = 'vision' in config['general']['scenario']
+        self.image_network = config['network']['image_network']
+        assert('dqn' == self.image_network or 'resnet' == self.image_network)
+
+        self.joints_inputs = tf.placeholder(tf.float32, (None, 4), name='joints_inputs')
+        self.action_inputs = tf.placeholder(tf.float32, (None, 4), name='action_inputs')
+
+        self.workspace_image_inputs, self.images_3d = None, None
+        if self.is_vision_enabled:
+            self.workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs')
+            self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1)
+
+        current_variables_count = len(tf.trainable_variables())
+        self.status_softmax_logits = self._create_network(self.joints_inputs, self.action_inputs, self.images_3d)
+        self.collision_variables = tf.trainable_variables()[current_variables_count:]
+
+        # model path to load
+        self.model_dir = model_dir
+        assert os.path.exists(self.model_dir)
+        self.saver_path = os.path.join(self.model_dir, "model_saver")
+        self.saver = tf.train.Saver(self.collision_variables, max_to_keep=4, save_relative_paths=True)
+
+    def _next_state_model(self, joints_inputs, action_inputs):
+        # next step is a deterministic computation
+        action_step_size = self.config['openrave_rl']['action_step_size']
+        step = action_inputs * action_step_size
+        unclipped_result = joints_inputs + step
+        # we initiate an openrave manager to get the robot, to get the joint bounds and the safety
+        joint_safety = 0.0001
+        lower_bounds = [-2.617, -1.571, -1.571, -1.745, -2.617]
+        lower_bounds = [b + joint_safety for b in lower_bounds[1:]]
+        upper_bounds = [-b for b in lower_bounds]
+
+        # clip the result
+        clipped_result = tf.maximum(unclipped_result, lower_bounds)
+        clipped_result = tf.minimum(clipped_result, upper_bounds)
+        return clipped_result, unclipped_result
+
+    def _create_network(self, joints_inputs, action_inputs, images_3d):
+        name_prefix = 'reward'
+
+        # get L2 regularization scale
+        l2_scale = 0.0
+        if 'l2_regularization_coefficient' in self.config['train']:
+            l2_scale = self.config['train']['l2_regularization_coefficient']
+
+        # get the next joints
+        clipped_next_joints, unclipped_next_joints = self._next_state_model(joints_inputs, action_inputs)
+        current = tf.concat((joints_inputs, clipped_next_joints), axis=1)
+
+        # add vision if needed
+        if self.is_vision_enabled:
+            if self.image_network == 'dqn':
+                visual_inputs = DqnModel(name_prefix, self.config).predict(images_3d, self._reuse_flag)
+            else:
+                num_of_residual_blocks = self.config['network']['resnet_num_of_residual_blocks']
+                visual_inputs = ResNetModel(name_prefix, self.config).predict(images_3d,
+                                                                              num_of_residual_blocks,
+                                                                              self._reuse_flag)
+            current = tf.concat((current, visual_inputs), axis=1)
+
+        layers = self.config['network']['layers'] + [2]
+        for i, layer_size in enumerate(layers):
+            _activation = get_activation(self.config['network']['activation']) if i < len(layers) - 1 else None
+            current = tf.layers.dense(
+                current,
+                layer_size,
+                activation=_activation,
+                name='{}_layers_{}'.format(name_prefix, i),
+                kernel_regularizer=tf_layers.l2_regularizer(l2_scale),
+                reuse=self._reuse_flag
+            )
+        softmax_logits = current
+
+        self._reuse_flag = True
+        return softmax_logits
+
+    def save_weights(self, sess, global_step=None):
+        self.saver.save(sess, self.saver_path, global_step=global_step)
+
+    def load_weights(self, sess):
+        self.saver.restore(sess, tf.train.latest_checkpoint(self.model_dir))
+
+    def make_feed(self, all_start_joints, all_actions, images=None):
+        feed = {
+            self.joints_inputs: all_start_joints,
+            self.action_inputs: all_actions,
+        }
+        if self.is_vision_enabled:
+            assert images is not None
+            assert images[0] is not None
+            feed[self.workspace_image_inputs] = images
+        return feed
diff --git a/reward_data_manager.py b/reward_data_manager.py
new file mode 100644
index 0000000..1996a74
--- /dev/null
+++ b/reward_data_manager.py
@@ -0,0 +1,253 @@
+import time
+import numpy as np
+import random
+import os
+import multiprocessing
+import Queue
+import pickle
+import bz2
+from image_cache import ImageCache
+
+FREE_TRANSITION_STATUS = 1
+COLLISION_STATUS = 2
+GOAL_STATUS = 3
+
+
+class UnzipperProcess(multiprocessing.Process):
+    def __init__(self, number_of_unzippers, files_queue, result_queue, unzipper_specific_queue):
+        multiprocessing.Process.__init__(self)
+        self.number_of_unzippers = number_of_unzippers
+        self.files_queue = files_queue
+        self.result_queue = result_queue
+        self.unzipper_specific_queue = unzipper_specific_queue
+
+    def run(self):
+        while True:
+            try:
+                if self.result_queue.empty():
+                    next_file = self.files_queue.get(block=True, timeout=1)
+                    with open(next_file, 'r') as source_file:
+                        result = pickle.load(source_file)
+                    self.result_queue.put(result)
+                    self.result_queue.task_done()
+                else:
+                    time.sleep(1)
+            except Queue.Empty:
+                time.sleep(1)
+                pass
+            try:
+                # only option is to break
+                task_type = self.unzipper_specific_queue.get(block=True, timeout=0.001)
+                break
+            except Queue.Empty:
+                pass
+
+
+class UnzipperIterator:
+    def __init__(self, number_of_unzippers, files):
+        self.number_of_unzippers = number_of_unzippers
+        self.files = files
+
+        self.unzipper_specific_queues = [multiprocessing.JoinableQueue() for _ in range(number_of_unzippers)]
+        self.files_queue = multiprocessing.JoinableQueue()
+        self.results_queue = multiprocessing.JoinableQueue()
+
+        self.unzippers = [UnzipperProcess(
+            number_of_unzippers, self.files_queue, self.results_queue, self.unzipper_specific_queues[i])
+            for i in range(number_of_unzippers)]
+
+        for u in self.unzippers:
+            u.start()
+
+    def __iter__(self):
+        random.shuffle(self.files)
+        # put all the files
+        for f in self.files:
+            self.files_queue.put(f)
+        # when ready - collect the zipped files
+        for i in range(len(self.files)):
+            unzipped = self.results_queue.get()
+            yield unzipped
+
+    def end(self):
+        for u in self.unzippers:
+            u.terminate()
+        time.sleep(10)
+
+
+class RewardDataLoader:
+    def __init__(self, data_dir, status_to_read, number_of_unzippers=None):
+        assert os.path.exists(data_dir)
+        self.cache_dir = data_dir.replace('supervised_data', 'supervised_data_cache')
+        if not os.path.exists(self.cache_dir):
+            self._create_cache(data_dir, self.cache_dir)
+
+        self.files = [os.path.join(self.cache_dir, f)
+                      for f in os.listdir(self.cache_dir)
+                      if f.endswith(".pkl") and f.startswith('{}_'.format(status_to_read))]
+
+        self.files_iterator = None
+        if number_of_unzippers is not None:
+            self.files_iterator = UnzipperIterator(number_of_unzippers, self.files)
+
+    @staticmethod
+    def _create_cache(data_dir, cache_dir):
+        print('creating cache for {} in {}'.format(data_dir, cache_dir))
+        os.makedirs(cache_dir)
+        files = [f for f in os.listdir(data_dir) if f.endswith(".pkl")]
+        for f in files:
+            destination_file = os.path.join(cache_dir, f)
+            print('caching {}'.format(f))
+            with bz2.BZ2File(os.path.join(data_dir, f), 'r') as compressed_file:
+                data = pickle.load(compressed_file)
+                with open(destination_file, 'w') as cache_file:
+                    pickle.dump(data, cache_file)
+        print('done creating cache for {} in {}'.format(data_dir, cache_dir))
+
+    def __iter__(self):
+        random.shuffle(self.files)
+        if self.files_iterator is None:
+            for f in self.files:
+                with open(f, 'r') as source_file:
+                    yield pickle.load(source_file)
+        else:
+            for content in self.files_iterator:
+                yield content
+
+    def stop(self):
+        if self.files_iterator is not None:
+            self.files_iterator.end()
+
+
+class Batcher:
+    def __init__(self, input_iterator, batch_size, shuffle_before_yield):
+        self.input_iterator = input_iterator
+        self.batch_size = batch_size
+        self.shuffle_before_yield = shuffle_before_yield
+
+    def __iter__(self):
+        current_batch = []
+        for tuple_list in self.input_iterator:
+            for t in tuple_list:
+                current_batch.append(t)
+                if len(current_batch) == self.batch_size:
+                    if self.shuffle_before_yield:
+                        random.shuffle(current_batch)
+                    yield current_batch
+                    current_batch = []
+        if self.shuffle_before_yield:
+            random.shuffle(current_batch)
+        yield current_batch
+
+
+class Oversampler:
+    def __init__(self, data_dir, free_class_batch_size, oversample_goal, oversample_collision,
+                 shuffle_batch_multiplier=2, number_of_unzippers=None):
+        self.data_dir = data_dir
+        self.oversample_goal = oversample_goal
+        self.oversample_collision = oversample_collision
+
+        # load data
+        self.all_collisions = self._load_all(RewardDataLoader(data_dir, 2, None))
+        self.all_goals = self._load_all(RewardDataLoader(data_dir, 3, None))
+        self.free_transitions_iterator = RewardDataLoader(data_dir, 1, number_of_unzippers=number_of_unzippers)
+        # self._describe_data(self.free_transitions_iterator) # TODO: UNCOMMENT THIS
+
+        inner_batcher = Batcher(self.free_transitions_iterator, free_class_batch_size * shuffle_batch_multiplier, True)
+        self.batcher = Batcher(inner_batcher, free_class_batch_size, False)
+
+    def _describe_data(self, free_transitions_iterator):
+        free_count = 0
+        for tuple_list in free_transitions_iterator:
+            free_count += len(tuple_list)
+        collision_count = len(self.all_collisions)
+        goal_count = len(self.all_goals)
+        all_count = free_count + collision_count + goal_count
+
+        print('data dir: {}'.format(self.data_dir))
+        print('free: {} ({})'.format(free_count, float(free_count) / all_count))
+        print('collision: {} ({})'.format(collision_count, float(collision_count) / all_count))
+        print('goal: {} ({})'.format(goal_count, float(goal_count) / all_count))
+        print('')
+
+    @staticmethod
+    def _load_all(files_iterator):
+        all_transitions = []
+        for tuple_list in files_iterator:
+            all_transitions.extend(tuple_list)
+        return all_transitions
+
+    def _oversample_result(self, free_transition_batch):
+        batch_size = len(free_transition_batch)
+        goal_sample_size = int(self.oversample_goal * batch_size)
+        goal_indices = list(np.random.choice(len(self.all_goals), goal_sample_size))
+        goal_current_batch = [self.all_goals[i] for i in goal_indices]
+        collision_sample_size = int(self.oversample_collision * batch_size)
+        collision_indices = list(np.random.choice(len(self.all_collisions), collision_sample_size))
+        collision_current_batch = [self.all_collisions[i] for i in collision_indices]
+        return free_transition_batch + goal_current_batch + collision_current_batch
+
+    def __iter__(self):
+        for free_transition_batch in self.batcher:
+            yield self._oversample_result(free_transition_batch)
+
+    def stop(self):
+        self.free_transitions_iterator.stop()
+
+
+def get_image_cache(config):
+    image_cache = None
+    scenario = config['general']['scenario']
+    if 'vision' in scenario:
+        print("Loading image cache")
+        params_dir = os.path.abspath(os.path.join(os.getcwd(), "scenario_params", scenario))
+        image_cache = ImageCache(params_dir)
+    return image_cache
+
+
+def get_train_and_test_datasets(config, is_collision_model=False):
+    number_of_unzippers = config['general']['number_of_unzippers']
+    batch_size = config['train']['batch_size']
+    if is_collision_model:
+        oversample_goal = 0
+    else:
+        oversample_goal = config['train']['oversample_goal']
+    oversample_collision = config['train']['oversample_collision']
+    scenario = config['general']['scenario']
+    base_data_dir = os.path.join('data', 'supervised_data', scenario + '_by_status')
+    train_data_dir = os.path.join(base_data_dir, 'train')
+    test_data_dir = os.path.join(base_data_dir, 'test')
+    train = Oversampler(train_data_dir, batch_size, oversample_goal, oversample_collision,
+                        number_of_unzippers=number_of_unzippers)
+    print("Loaded Train")
+    test = Oversampler(test_data_dir, batch_size, oversample_goal, oversample_collision,
+                       number_of_unzippers=number_of_unzippers)
+    print("Loaded Test")
+    return train, test
+
+
+def get_batch_and_labels(batch, image_cache):
+    all_start_joints = []
+    all_actions = []
+    all_status = []
+    all_next_joints = []
+    all_goal_joints = []
+    all_images = None
+    if image_cache is not None:
+        all_images = []
+    for i in range(len(batch)):
+        if image_cache is None:
+            workspace_id = None
+            start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i]
+        else:
+            workspace_id, start_joints, goal_joints, action, next_joints, reward, terminated, status = batch[i]
+        all_start_joints.append(start_joints[1:])
+        all_actions.append(action[1:])
+        all_next_joints.append(next_joints[1:])
+        all_goal_joints.append(goal_joints[1:])
+        all_status.append(status)
+        if image_cache is not None:
+            workspace_id = workspace_id.split(".")[0]+".pkl"
+            image = image_cache.items[workspace_id].np_array
+            all_images.append(image)
+    return [all_start_joints, all_actions, all_images, all_next_joints, all_goal_joints], all_status
diff --git a/reward_model.py b/reward_model.py
new file mode 100644
index 0000000..17aabab
--- /dev/null
+++ b/reward_model.py
@@ -0,0 +1,146 @@
+from reward_collision_model import CollisionModel
+import reward_data_manager
+from reward_data_manager import get_train_and_test_datasets, get_batch_and_labels, get_image_cache
+import time
+import datetime
+import numpy as np
+import os
+import yaml
+import tensorflow as tf
+
+
+class RewardModel:
+
+    def __init__(self, model_name, config, models_base_dir, tensorboard_dir, trained_collision_model):
+
+        self.model_name = model_name
+        self.config = config
+
+        self.model_dir = os.path.join(models_base_dir, self.model_name)
+        if not os.path.exists(self.model_dir):
+            os.makedirs(self.model_dir)
+
+        self.train_summaries = []
+        self.test_summaries = []
+
+        self.collision_model = trained_collision_model
+        self.collision_prob = self.collision_model.collision_prob
+
+        self.next_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='next_joints_inputs')
+        self.goal_joints_inputs = tf.placeholder(tf.float32, (None, 4), name='goal_joints_inputs')
+
+        self.states_probabilities = self.create_reward_logic(self.next_joints_inputs, self.goal_joints_inputs, self.collision_prob)
+
+        self.status_input = tf.placeholder(tf.int32, [None, ])
+        self.prediction = tf.argmax(self.states_probabilities, axis=1, output_type=tf.int32)
+
+        self.test_measures = self.add_test_measures()
+
+        with open(os.path.join(self.model_dir, 'config.yml'), 'w') as fd:
+            yaml.dump(config, fd)
+
+        self.test_board = self.TensorBoard(tensorboard_dir, 'test_' + model_name, self.test_summaries)
+
+    def predict(self, data_batch, session):
+        feed = self.collision_model.make_feed(data_batch)
+        return session.run([self.prediction], feed)[0]
+
+    def create_reward_logic(self, next_joints, goal_joints, collision_prob):
+        # close-to-goal sensitivity
+        goal_sensitivity = self.config['model']['goal_configuration_distance_sensitivity']
+
+        # margin parameter in which the model transitions from 100% close to goal to 0% close to goal
+        alpha = 0.4
+
+        # distance to goal
+        self.delta = tf.norm(next_joints - goal_joints, ord='euclidean', axis=1)
+
+        # This expression is 0 if the distance between current and goal is below \epsilon.
+        self.a = tf.maximum(self.delta - goal_sensitivity, 0)
+
+        # When delta < epsilon: this is 0
+        # When delta > epsilon + alpha: this is \alpha,
+        # and in-between it is linear
+        self.b = tf.minimum(self.a, alpha)
+
+        self.is_close_to_goal = 1 - (self.b / alpha)
+
+        self.is_close_to_goal = tf.expand_dims(self.is_close_to_goal, -1)
+
+        p_goal = (1 - collision_prob) * self.is_close_to_goal
+        p_free_space = (1 - collision_prob) * (1 - self.is_close_to_goal)
+
+        return tf.concat((p_free_space, collision_prob, p_goal), axis=1)
+
+    def add_test_measures(self):
+        labels = self.status_input - 1
+        accuracy = tf.reduce_mean(tf.cast(tf.equal(labels, self.prediction), tf.float32))
+        # # TP = tf.count_nonzero((self.prediction) * (labels), dtype=tf.float32) + tf.Variable(0.001)
+        # # TN = tf.count_nonzero((self.prediction - 1) * (labels - 1), dtype=tf.float32) + tf.Variable(0.001)
+        # # FP = tf.count_nonzero(self.prediction * (labels - 1), dtype=tf.float32) + tf.Variable(0.001)
+        # # FN = tf.count_nonzero((self.prediction - 1) * labels, dtype=tf.float32) + tf.Variable(0.001)
+        # precision = TP / (TP + FP)
+        # recall = TP / (TP + FN)
+        accuracy_summary = tf.summary.scalar('Accuracy', accuracy)
+        # recall_summary = tf.summary.scalar('Recall', recall)
+        # precision_summary = tf.summary.scalar('Precision', precision)
+        self.test_summaries += [accuracy_summary]
+        return [accuracy]
+
+    def _test_batch(self, test_batch, test_status_batch, session):
+        batch_start_joints, batch_actions, batch_images, batch_next_joints, batch_goal_joints = test_batch
+        test_feed = self.collision_model.make_feed((batch_start_joints, batch_actions, batch_images))
+        test_feed[self.status_input] = np.array(test_status_batch)
+        test_feed[self.next_joints_inputs] = batch_next_joints
+        test_feed[self.goal_joints_inputs] = batch_goal_joints
+        test_summary = session.run(
+            [self.test_board.summaries] + self.test_measures,
+            test_feed)[0]
+        results = session.run(
+            [self.test_measures, self.prediction, self.status_input - 1, self.states_probabilities],
+            test_feed)
+        print(results)
+        self.test_board.writer.add_summary(test_summary)
+        self.test_board.writer.flush()
+
+    def test(self, test_data, image_cache, session):
+        session.run(tf.global_variables_initializer())
+        session.run(tf.local_variables_initializer())
+
+        self.collision_model.load(session)
+
+        test_batch = next(test_data.__iter__()) # random test batch
+        test_batch, test_status_batch = get_batch_and_labels(test_batch, image_cache)
+        self._test_batch(test_batch, test_status_batch, session)
+
+    class TensorBoard:
+
+        def __init__(self, tensorboard_path, board_name, summaries):
+            self.writer = tf.summary.FileWriter(os.path.join(tensorboard_path, board_name))
+            self.summaries = tf.summary.merge(summaries)
+
+
+if __name__ == '__main__':
+    # read the config
+    config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
+    with open(config_path, 'r') as yml_file:
+        config = yaml.load(yml_file)
+        print('------------ Config ------------')
+        print(yaml.dump(config))
+
+    models_base_dir = os.path.join('data', 'reward', 'model')
+    collision_model_name = "2019_08_31_09_14_58-collision-resnet-vision"
+    collision_model = CollisionModel(collision_model_name, config, models_base_dir, tensorboard_dir=models_base_dir)
+
+    model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
+    reward_model = RewardModel(model_name, config, models_base_dir, models_base_dir, collision_model)
+
+    train_data, test_data = get_train_and_test_datasets(config, is_collision_model=False)
+    image_cache = get_image_cache(config)
+
+    gpu_usage = config['general']['gpu_usage']
+    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_usage))
+    with tf.Session(config=session_config) as session:
+        reward_model.test(test_data, image_cache, session)
+    train_data.stop()
+    test_data.stop()
diff --git a/train_supervised_reward.py b/train_supervised_reward.py
deleted file mode 100644
index ba423a8..0000000
--- a/train_supervised_reward.py
+++ /dev/null
@@ -1,385 +0,0 @@
-from image_cache import ImageCache
-from pre_trained_reward import *
-from openrave_manager import OpenraveManager
-from potential_point import PotentialPoint
-
-import time
-import datetime
-import numpy as np
-import random
-import os
-import yaml
-import tensorflow as tf
-import multiprocessing
-import Queue
-
-
-class UnzipperProcess(multiprocessing.Process):
-    def __init__(self, number_of_unzippers, files_queue, result_queue, unzipper_specific_queue):
-        multiprocessing.Process.__init__(self)
-        self.number_of_unzippers = number_of_unzippers
-        self.files_queue = files_queue
-        self.result_queue = result_queue
-        self.unzipper_specific_queue = unzipper_specific_queue
-
-    def run(self):
-        while True:
-            try:
-                if self.result_queue.qsize() < self.number_of_unzippers:
-                    next_file = self.files_queue.get(block=True, timeout=1)
-                    with open(next_file, 'r') as source_file:
-                        result = pickle.load(source_file)
-                    self.result_queue.put(result)
-                    self.result_queue.task_done()
-                else:
-                    time.sleep(1)
-            except Queue.Empty:
-                pass
-            try:
-                # only option is to break
-                task_type = self.unzipper_specific_queue.get(block=True, timeout=0.001)
-                break
-            except Queue.Empty:
-                pass
-
-
-class UnzipperIterator:
-    def __init__(self, number_of_unzippers, files):
-        self.number_of_unzippers = number_of_unzippers
-        self.files = files
-
-        self.unzipper_specific_queues = [multiprocessing.JoinableQueue() for _ in range(number_of_unzippers)]
-        self.files_queue = multiprocessing.JoinableQueue()
-        self.results_queue = multiprocessing.JoinableQueue()
-
-        self.unzippers = [UnzipperProcess(
-            number_of_unzippers, self.files_queue, self.results_queue, self.unzipper_specific_queues[i])
-            for i in range(number_of_unzippers)]
-
-        for u in self.unzippers:
-            u.start()
-
-    def __iter__(self):
-        random.shuffle(self.files)
-        # put all the files
-        for f in self.files:
-            self.files_queue.put(f)
-        # when ready - collect the zipped files
-        for i in range(len(self.files)):
-            unzipped = self.results_queue.get()
-            yield unzipped
-
-    def end(self):
-        for u in self.unzippers:
-            u.terminate()
-        time.sleep(10)
-
-
-class RewardDataLoader:
-    def __init__(self, data_dir, status_to_read, number_of_unzippers=None):
-        assert os.path.exists(data_dir)
-        cache_dir = data_dir.replace('supervised_data', 'supervised_data_cache')
-        if not os.path.exists(cache_dir):
-            self._create_cache(data_dir, cache_dir)
-
-        files = [f for f in os.listdir(data_dir) if f.endswith(".pkl") and f.startswith('{}_'.format(status_to_read))]
-        assert len(files) > 0
-        self.cache_dir = cache_dir
-        self.files = [os.path.join(self.cache_dir, f) for f in files]
-
-        self.files_iterator = None
-        if number_of_unzippers is not None:
-            self.files_iterator = UnzipperIterator(number_of_unzippers, self.files)
-
-    @staticmethod
-    def _create_cache(data_dir, cache_dir):
-        print 'creating cache for {} in {}'.format(data_dir, cache_dir)
-        os.makedirs(cache_dir)
-        files = [f for f in os.listdir(data_dir) if f.endswith(".pkl")]
-        for f in files:
-            destination_file = os.path.join(cache_dir, f)
-            print 'caching {}'.format(f)
-            with bz2.BZ2File(os.path.join(data_dir, f), 'r') as compressed_file:
-                data = pickle.load(compressed_file)
-                with open(destination_file, 'w') as cache_file:
-                    pickle.dump(data, cache_file)
-        print 'done creating cache for {} in {}'.format(data_dir, cache_dir)
-
-    def __iter__(self):
-        random.shuffle(self.files)
-        if self.files_iterator is None:
-            for f in self.files:
-                with open(f, 'r') as source_file:
-                    yield pickle.load(source_file)
-        else:
-            for content in self.files_iterator:
-                yield content
-
-    def stop(self):
-        if self.files_iterator is not None:
-            self.files_iterator.end()
-
-
-class Batcher:
-    def __init__(self, input_iterator, batch_size, shuffle_before_yield):
-        self.input_iterator = input_iterator
-        self.batch_size = batch_size
-        self.shuffle_before_yield = shuffle_before_yield
-
-    def __iter__(self):
-        current_batch = []
-        for tuple_list in self.input_iterator:
-            for t in tuple_list:
-                current_batch.append(t)
-                if len(current_batch) == self.batch_size:
-                    if self.shuffle_before_yield:
-                        random.shuffle(current_batch)
-                    yield current_batch
-                    current_batch = []
-        if self.shuffle_before_yield:
-            random.shuffle(current_batch)
-        yield current_batch
-
-
-class Oversampler:
-    def __init__(self, data_dir, free_class_batch_size, oversample_goal, oversample_collision,
-                 shuffle_batch_multiplier=2, number_of_unzippers=None):
-        self.data_dir = data_dir
-        self.oversample_goal = oversample_goal
-        self.oversample_collision = oversample_collision
-
-        # load data
-        self.all_collisions = self._load_all(RewardDataLoader(data_dir, 2, None))
-        self.all_goals = self._load_all(RewardDataLoader(data_dir, 3, None))
-        self.free_transitions_iterator = RewardDataLoader(data_dir, 1, number_of_unzippers=number_of_unzippers)
-        self._describe_data(self.free_transitions_iterator)
-
-        inner_batcher = Batcher(self.free_transitions_iterator, free_class_batch_size * shuffle_batch_multiplier, True)
-        self.batcher = Batcher(inner_batcher, free_class_batch_size, False)
-
-    def _describe_data(self, free_transitions_iterator):
-        free_count = 0
-        for tuple_list in free_transitions_iterator:
-            free_count += len(tuple_list)
-        collision_count = len(self.all_collisions)
-        goal_count = len(self.all_goals)
-        all_count = free_count + collision_count + goal_count
-
-        print 'data dir: {}'.format(self.data_dir)
-        print 'free: {} ({})'.format(free_count, float(free_count) / all_count)
-        print 'collision: {} ({})'.format(collision_count, float(collision_count) / all_count)
-        print 'goal: {} ({})'.format(goal_count, float(goal_count) / all_count)
-        print ''
-
-    @staticmethod
-    def _load_all(files_iterator):
-        all_transitions = []
-        for tuple_list in files_iterator:
-            all_transitions.extend(tuple_list)
-        return all_transitions
-
-    def _oversample_result(self, free_transition_batch):
-        batch_size = len(free_transition_batch)
-        goal_sample_size = int(self.oversample_goal * batch_size)
-        goal_indices = list(np.random.choice(len(self.all_goals), goal_sample_size))
-        goal_current_batch = [self.all_goals[i] for i in goal_indices]
-        collision_sample_size = int(self.oversample_collision * batch_size)
-        collision_indices = list(np.random.choice(len(self.all_collisions), collision_sample_size))
-        collision_current_batch = [self.all_collisions[i] for i in collision_indices]
-        return free_transition_batch + goal_current_batch + collision_current_batch
-
-    def __iter__(self):
-        for free_transition_batch in self.batcher:
-            yield self._oversample_result(free_transition_batch)
-
-    def stop(self):
-        self.free_transitions_iterator.stop()
-
-
-model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
-
-# read the config
-config_path = os.path.join(os.getcwd(), 'config/reward_config.yml')
-with open(config_path, 'r') as yml_file:
-    config = yaml.load(yml_file)
-    print('------------ Config ------------')
-    print(yaml.dump(config))
-
-
-epochs = config['general']['epochs']
-save_every_epochs = config['general']['save_every_epochs']
-batch_size = config['model']['batch_size']
-
-initial_learn_rate = config['reward']['initial_learn_rate']
-decrease_learn_rate_after = config['reward']['decrease_learn_rate_after']
-learn_rate_decrease_rate = config['reward']['learn_rate_decrease_rate']
-oversample_goal = config['reward']['oversample_goal']
-oversample_collision = config['reward']['oversample_collision']
-
-scenario = config['general']['scenario']
-# base_data_dir = os.path.join('supervised_data', scenario)
-base_data_dir = os.path.join('supervised_data', scenario + '_by_status')
-image_cache = None
-if 'vision' in scenario:
-    params_dir = os.path.abspath(os.path.expanduser('~/ModelBasedDDPG/scenario_params/{}/'.format(scenario)))
-    image_cache = ImageCache(params_dir)
-train_data_dir = os.path.join(base_data_dir, 'train')
-test_data_dir = os.path.join(base_data_dir, 'test')
-
-
-number_of_unzippers = config['general']['number_of_unzippers']
-
-train = Oversampler(train_data_dir, batch_size, oversample_goal, oversample_collision,
-                    number_of_unzippers=number_of_unzippers)
-test = Oversampler(test_data_dir, batch_size, oversample_goal, oversample_collision,
-                   number_of_unzippers=number_of_unzippers)
-
-# get openrave manager
-openrave_manager = OpenraveManager(0.001, PotentialPoint.from_config(config))
-
-# set summaries and saver dir
-summaries_dir = os.path.join('reward', 'tensorboard')
-train_summary_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'train_' + model_name))
-test_summary_writer = tf.summary.FileWriter(os.path.join(summaries_dir, 'test_' + model_name))
-saver_dir = os.path.join('reward', 'model', model_name)
-if not os.path.exists(saver_dir):
-    os.makedirs(saver_dir)
-
-# save the config
-config_copy_path = os.path.join(saver_dir, 'config.yml')
-yaml.dump(config, open(config_copy_path, 'w'))
-
-# create the network
-pre_trained_reward = PreTrainedReward(model_name, config)
-reward_prediction = pre_trained_reward.reward_prediction
-reward_input = tf.placeholder(tf.float32, [None, 1])
-# reward_loss = tf.reduce_mean(tf.square(reward_input - reward_prediction))
-reward_loss = tf.losses.mean_squared_error(labels=reward_input, predictions=reward_prediction)
-status_prediction = pre_trained_reward.status_softmax_logits
-status_input = tf.placeholder(tf.int32, [None, ])
-# status_loss = tf.reduce_mean(
-#     tf.nn.sparse_softmax_cross_entropy_with_logits(labels=status_input-1, logits=status_prediction)
-# ) * config['reward']['cross_entropy_coefficient']
-status_loss = tf.losses.sparse_softmax_cross_entropy(labels=status_input-1, logits=status_prediction) * config[
-    'reward']['cross_entropy_coefficient']
-regularization_loss = tf.losses.get_regularization_loss()
-total_loss = reward_loss + status_loss + regularization_loss
-
-
-global_step = tf.Variable(0, trainable=False)
-learning_rate = tf.train.exponential_decay(
-    initial_learn_rate, global_step, decrease_learn_rate_after, learn_rate_decrease_rate, staircase=True)
-optimizer = tf.train.AdamOptimizer(learning_rate)
-gradients, variables = zip(*optimizer.compute_gradients(total_loss, tf.trainable_variables()))
-initial_gradients_norm = tf.global_norm(gradients)
-gradient_limit = config['reward']['gradient_limit']
-if gradient_limit > 0.0:
-    gradients, _ = tf.clip_by_global_norm(gradients, gradient_limit, use_norm=initial_gradients_norm)
-clipped_gradients_norm = tf.global_norm(gradients)
-optimize_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)
-
-reward_loss_summary = tf.summary.scalar('reward_loss', reward_loss)
-status_loss_summary = tf.summary.scalar('status_loss', status_loss)
-regularization_loss_summary = tf.summary.scalar('regularization_loss_summary', regularization_loss)
-total_loss_summary = tf.summary.scalar('total_loss_summary', total_loss)
-
-# summaries for the reward optimization
-train_optimization_summaries = tf.summary.merge([
-    reward_loss_summary,
-    status_loss_summary,
-    regularization_loss_summary,
-    total_loss_summary,
-    tf.summary.scalar('gradients_norm_initial', initial_gradients_norm),
-    tf.summary.scalar('gradients_norm_clipped', clipped_gradients_norm),
-    tf.summary.scalar('learn_rate', learning_rate)
-])
-
-test_optimization_summaries = tf.summary.merge([
-    reward_loss_summary,
-    status_loss_summary,
-    regularization_loss_summary,
-    total_loss_summary,
-])
-
-goal_mean_rewards_error_input = tf.placeholder(tf.float32, name='goal_mean_rewards_error_input')
-collision_mean_rewards_error_input = tf.placeholder(tf.float32, name='collision_mean_rewards_error_input')
-other_mean_rewards_error_input = tf.placeholder(tf.float32, name='other_mean_rewards_error_input')
-goal_max_rewards_error_input = tf.placeholder(tf.float32, name='goal_max_rewards_error_input')
-collision_max_rewards_error_input = tf.placeholder(tf.float32, name='collision_max_rewards_error_input')
-other_max_rewards_error_input = tf.placeholder(tf.float32, name='other_max_rewards_error_input')
-goal_accuracy_input = tf.placeholder(tf.float32, name='goal_accuracy_input')
-collision_accuracy_input = tf.placeholder(tf.float32, name='collision_accuracy_input')
-other_accuracy_input = tf.placeholder(tf.float32, name='other_accuracy_input')
-
-test_summaries = tf.summary.merge([
-    tf.summary.scalar('goal_mean_rewards_error', goal_mean_rewards_error_input),
-    tf.summary.scalar('collision_mean_rewards_error', collision_mean_rewards_error_input),
-    tf.summary.scalar('other_mean_rewards_error', other_mean_rewards_error_input),
-    tf.summary.scalar('goal_max_rewards_error', goal_max_rewards_error_input),
-    tf.summary.scalar('collision_max_rewards_error', collision_max_rewards_error_input),
-    tf.summary.scalar('other_max_rewards_error', other_max_rewards_error_input),
-    tf.summary.scalar('goal_accuracy', goal_accuracy_input),
-    tf.summary.scalar('collision_accuracy', collision_accuracy_input),
-    tf.summary.scalar('other_accuracy', other_accuracy_input),
-])
-
-with tf.Session(
-        config=tf.ConfigProto(
-            gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=config['general']['gpu_usage'])
-        )
-) as sess:
-    sess.run(tf.global_variables_initializer())
-    current_global_step = 0
-    for epoch in range(epochs):
-        # run train for one epoch
-        for train_batch in train:
-            train_batch, train_rewards, train_status = get_batch_and_labels(train_batch, openrave_manager, image_cache)
-            train_status_one_hot = np.zeros((len(train_rewards), 3), dtype=np.float32)
-            train_status_one_hot[np.arange(len(train_rewards)), np.array(train_status)-1] = 1.0
-            train_feed = pre_trained_reward.make_feed(*train_batch, all_transition_labels=train_status_one_hot)
-            train_feed[reward_input] = np.expand_dims(np.array(train_rewards), axis=1)
-            train_feed[status_input] = np.array(train_status)
-            train_total_loss, train_summary, current_global_step, _ = sess.run(
-                [total_loss, train_optimization_summaries, global_step, optimize_op], train_feed)
-            # if np.isnan(train_loss):
-            #     for b in train_batch:
-            #         print b
-            #     print train_rewards
-            #     exit()
-            train_summary_writer.add_summary(train_summary, current_global_step)
-        train_summary_writer.flush()
-
-        if current_global_step > 0:
-            # run test for one (random) batch
-            test_batch = None
-            for test_batch in test:
-                break
-            test_batch, test_rewards, test_status = get_batch_and_labels(test_batch, openrave_manager, image_cache)
-            test_feed = pre_trained_reward.make_feed(*test_batch)
-            test_feed[reward_input] = np.expand_dims(np.array(test_rewards), axis=1)
-            test_feed[status_input] = np.array(test_status)
-            test_reward_prediction, test_status_prediction, test_total_loss, test_summary = sess.run(
-                [reward_prediction, status_prediction, total_loss, test_optimization_summaries], test_feed)
-            test_summary_writer.add_summary(test_summary, current_global_step)
-            # see what happens for different reward classes:
-            goal_stats, collision_stats, other_stats = compute_stats_per_class(
-                test_status, test_rewards, test_status_prediction, test_reward_prediction)
-            test_summary_writer.add_summary(sess.run(test_summaries, {
-                goal_mean_rewards_error_input: goal_stats[0],
-                collision_mean_rewards_error_input: collision_stats[0],
-                other_mean_rewards_error_input: other_stats[0],
-                goal_max_rewards_error_input: goal_stats[1],
-                collision_max_rewards_error_input: collision_stats[1],
-                other_max_rewards_error_input: other_stats[1],
-                goal_accuracy_input: goal_stats[2],
-                collision_accuracy_input: collision_stats[2],
-                other_accuracy_input: other_stats[2],
-            }), current_global_step)
-            test_summary_writer.flush()
-            # save the model
-            if epoch % save_every_epochs == save_every_epochs-1:
-                pre_trained_reward.saver.save(sess, os.path.join(saver_dir, 'reward'), global_step=current_global_step)
-        print 'done epoch {} of {}, global step {}'.format(epoch, epochs, current_global_step)
-
-train.stop()
-test.stop()
diff --git a/vae_network.py b/vae_network.py
new file mode 100644
index 0000000..edc1908
--- /dev/null
+++ b/vae_network.py
@@ -0,0 +1,161 @@
+import tensorflow as tf
+from coordnet_model import coord_conv
+
+
+def get_flatten_shape(multi_dim_shape):
+    flat_shape = 1
+    for dim in multi_dim_shape.as_list()[1:]:
+        flat_shape *= dim
+    return flat_shape
+
+
+class VAENetwork:
+    def __init__(self, config, model_dir, input_shape):
+        self.prefix = 'vae'
+        self.config = config
+        self.latent_dim = self.config['reward']['vae_latent_dim']
+        self.input_shape = input_shape.as_list()
+
+        assert(self.input_shape[0] is None)
+        self.input_shape[0] = -1
+        self.flat_input_shape = get_flatten_shape(input_shape)
+
+        self.workspace_image_inputs = tf.placeholder(tf.float32, input_shape, name='workspace_image_inputs')
+        self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1)
+
+        self.encoded, self.mean, self.std_dev = self.encode(self.images_3d, reuse_flag=False)
+        self.decoded = self.decode(self.encoded, reuse_flag=False)
+        self.epochs = 10
+
+    def predict(self, workspace_image, reuse_flag):
+        return self.encode(workspace_image, reuse_flag)[0]
+
+    def encode(self, workspace_image, reuse_flag):
+        if self.config['reward']['use_coordnet']:
+            print("Using Coordnet")
+            workspace_image = coord_conv(55, 111, False, workspace_image, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True,
+                               name='{}_conv1'.format(self.prefix), _reuse=reuse_flag)
+        workspace_image = tf.Print(workspace_image, [workspace_image], message="my workspace_image:")
+
+        conv2 = tf.layers.conv2d(workspace_image, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
+                                 name='{}_conv2'.format(self.prefix), reuse=reuse_flag)
+        conv2 = tf.Print(conv2, [conv2], message="my encoded_conv2:")
+
+        conv3 = tf.layers.conv2d(conv2, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True)
+        conv4 = tf.layers.conv2d(conv3, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True)
+        flat = tf.layers.flatten(conv4, name='{}_flat'.format(self.prefix))
+
+        self.encode_last_conv_shape = conv4.shape.as_list()
+        self.encode_last_conv_shape[0] = -1
+        self.encode_after_conv_flat_shape = flat.shape.as_list()
+
+        self.conv2 = conv2
+        self.workspace_image = workspace_image
+
+
+        dense1 = tf.layers.dense(flat, 512, activation=tf.nn.relu, name='{}_dense1'.format(self.prefix), reuse=reuse_flag)
+        dense2 = tf.layers.dense(dense1, 512, activation=tf.nn.relu, name='{}_dense2'.format(self.prefix), reuse=reuse_flag)
+        dense3 = tf.layers.dense(dense2, 512, activation=tf.nn.relu, name='{}_dense3'.format(self.prefix), reuse=reuse_flag)
+
+        # Local latent variables
+        mean_ = tf.layers.dense(dense3, units=self.latent_dim, name='mean')
+        std_dev = tf.nn.softplus(tf.layers.dense(dense3, units=self.latent_dim), name='std_dev')  # softplus to force >0
+        mean_ = tf.Print(mean_, [mean_], message="my mean_:")
+        std_dev = tf.Print(std_dev, [std_dev], message="my std_dev:")
+        # Reparametrization trick
+        epsilon = tf.random_normal(tf.stack([tf.shape(dense3)[0], self.latent_dim]), name='epsilon')
+        z = mean_ + tf.multiply(epsilon, std_dev)
+
+        return z, mean_, std_dev
+
+    def decode(self, encoded_layer, reuse_flag):
+        decoded_dense1 = tf.layers.dense(encoded_layer, 512, activation=tf.nn.relu, name='{}_decoded_dense1'.format(self.prefix), reuse=reuse_flag)
+        decoded_dense2 = tf.layers.dense(decoded_dense1, 512, activation=tf.nn.relu, name='{}_decoded_dense2'.format(self.prefix), reuse=reuse_flag)
+        decoded_dense3 = tf.layers.dense(decoded_dense2, self.encode_after_conv_flat_shape[1], activation=tf.nn.relu, name='{}_decoded_dense3'.format(self.prefix), reuse=reuse_flag)
+        decoded_before_conv = tf.reshape(decoded_dense3, self.encode_last_conv_shape)
+
+        decoded_conv4 = tf.layers.conv2d_transpose(decoded_before_conv, 32, 8, 4, padding='same', activation=tf.nn.relu, use_bias=True,
+                                                   name='{}_decoded_conv4'.format(self.prefix), reuse=reuse_flag)
+        decoded_conv3 = tf.layers.conv2d_transpose(decoded_conv4, 64, 3, 1, padding='same', activation=tf.nn.relu, use_bias=True,
+                                                   name='{}_decoded_conv3'.format(self.prefix), reuse=reuse_flag)
+        decoded_conv2 = tf.layers.conv2d_transpose(decoded_conv3, 64, 4, 2, padding='same', activation=tf.nn.relu, use_bias=True,
+                                                   name='{}_decoded_conv2'.format(self.prefix), reuse=reuse_flag)
+        decoded_conv2 = tf.Print(decoded_conv2, [decoded_conv2], message="my decoded_conv2:")
+        img = tf.slice(decoded_conv2, [0,0,0,0], [-1, 55, 111, 1])
+        img = tf.Print(img, [img], message="my decoded_img:")
+
+        # flat = tf.layers.flatten(decoded_conv3, name='{}_decode_flat'.format(self.prefix))
+        # decoded_dense4 = tf.layers.dense(sliced_image, self.flat_input_shape, activation=tf.nn.sigmoid, name='{}_decoded_dense4'.format(self.prefix), reuse=reuse_flag)
+        #
+        # img = tf.reshape(decoded_dense4, shape=self.input_shape)
+
+        return img
+
+    def get_loss(self):
+        # Reshape input and output to flat vectors
+        flat_output = tf.reshape(self.decoded, [-1, get_flatten_shape(self.decoded.shape)]) + 0.0001
+        flat_input = tf.reshape(self.images_3d, [-1, get_flatten_shape(self.images_3d.shape)])
+        flat_output = tf.Print(flat_output, [flat_output], message="my flat_output:")
+        flat_input = tf.Print(flat_input, [flat_input], message="my flat_input:")
+        with tf.name_scope('loss'):
+            img_loss_vec = flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output)
+            img_loss_vec = tf.Print(img_loss_vec, [img_loss_vec], message="my img_loss_vec:")
+            img_loss = tf.reduce_sum(img_loss_vec, 1)
+            # img_loss = tf.reduce_sum(flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output), 1)
+            img_loss = tf.Print(img_loss, [img_loss], message="my img_loss:")
+            latent_loss = 0.5 * tf.reduce_sum(tf.square(self.mean) + tf.square(self.std_dev) - tf.log(tf.square(self.std_dev)) - 1, 1)
+            latent_loss = tf.Print(latent_loss, [latent_loss], message="my latent_loss:")
+            self.total_loss = tf.reduce_mean(img_loss + latent_loss)
+            self.latent_loss = tf.reduce_mean(latent_loss)
+            self.img_loss = tf.reduce_mean(img_loss)
+
+        return self.img_loss, self.latent_loss, self.total_loss
+
+
+
+import yaml
+import os
+import datetime
+import time
+from reward_data_manager import *
+
+if __name__ == "__main__":
+    # read the config
+    config_path = os.path.join(os.getcwd(), 'data/config/reward_config.yml')
+    with open(config_path, 'r') as yml_file:
+        config = yaml.load(yml_file)
+        print('------------ Config ------------')
+        print(yaml.dump(config))
+
+    model_name = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S')
+
+    image_cache = get_image_cache(config)
+    batch_size = 100
+    images_data = [image.np_array for image in image_cache.items.values()]
+    images_batch_data = [images_data[i:i+batch_size] for i in range(0, len(images_data), batch_size)]
+
+    train_data_count = int(len(images_batch_data) * 0.8)
+    train_data = images_batch_data[:train_data_count]
+    test_data = images_batch_data[train_data_count:]
+
+    workspace_image_inputs = tf.placeholder(tf.float32, (None, 55, 111), name='workspace_image_inputs')
+    model = VAENetwork(model_name, config, workspace_image_inputs.shape)
+    with tf.Session() as session:
+        model.train(train_data, test_data, session)
+
+    # model = DqnModel(model_name, config, images_3d.shape)
+    # z, mean_, std_dev = model.encode(images_3d, reuse_flag=False)
+    # output = model.decode(z, reuse_flag=False)
+    #
+    # # Reshape input and output to flat vectors
+    # flat_output = tf.reshape(output, [-1, get_flatten_shape(output.shape)])
+    # flat_input = tf.reshape(images_3d, [-1, get_flatten_shape(images_3d.shape)])
+
+    # with tf.name_scope('loss'):
+    #     img_loss = tf.reduce_sum(flat_input * -tf.log(flat_output) + (1 - flat_input) * -tf.log(1 - flat_output), 1)
+    #     latent_loss = 0.5 * tf.reduce_sum(tf.square(mean_) + tf.square(std_dev) - tf.log(tf.square(std_dev)) - 1, 1)
+    #     loss = tf.reduce_mean(img_loss + latent_loss)
+
+
+    x = 1
+
diff --git a/workspace_generation_utils.py b/workspace_generation_utils.py
index 58e32fb..920a48b 100755
--- a/workspace_generation_utils.py
+++ b/workspace_generation_utils.py
@@ -193,7 +193,7 @@ def __init__(self, print_info=True, min_obstacles=1, max_obstacles=3, min_center
 
     def _print_variable(self, name, variable):
         if self.print_info:
-            print name, variable
+            print(name, variable)
 
     def _randomize_obstacle_parameters(self):
         ray_angle = uniform(WorkspaceGenerator.min_angle, WorkspaceGenerator.max_angle)
@@ -296,7 +296,7 @@ def plan_start_goal(self, slices, max_planner_iterations):
 
     def _print_variable(self, name, variable):
         if self.print_info:
-            print name, variable
+            print(name, variable)
 
     def _get_valid_joints(self, slices, forbidden_slice=None):
         while True: