diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..567633c
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,126 @@
+name: CI/CD
+
+on:
+  push:
+    branches:
+      - 'master'
+  pull_request:
+  # Run daily at 0:01 UTC
+  schedule:
+  - cron:  '1 0 * * *'
+
+jobs:
+  test:
+    name: "🐍 ${{ matrix.python-version }} • ${{ matrix.runs-on }}"
+    runs-on: ${{ matrix.runs-on }}
+    strategy:
+      matrix:
+        # just one for now
+        runs-on: [ubuntu-latest]
+        python-version: [3.6, 3.7]
+
+    defaults:
+      run:
+        shell: "bash -l {0}"
+
+    steps:
+    - uses: actions/checkout@master
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name:  'Setup conda env'
+      uses: conda-incubator/setup-miniconda@v2
+      with: 
+        python-version: ${{ matrix.python-version }}
+        mamba-version: "*"    
+        channels: conda-forge,defaults
+        channel-priority: true
+        auto-activate-base: false
+        environment-file: conda_env.yml 
+        activate-environment: DJC2
+        
+    - name: "Check conda env"
+      run: |
+        conda env list
+        conda info
+        conda list
+        which python  
+        python -V
+
+    - name: Set env vars 1
+      run: | 
+        # normally one would `source docker_env.sh` but GHA can't do this
+        # It's not possible to chain these - an exported env var only becomes available in next step
+        echo "DEEPJETCORE=$( pwd -P)" >> $GITHUB_ENV
+        echo "PATH=$( pwd -P)/bin:$PATH" >> $GITHUB_ENV
+        echo "PYTHONPATH=$( pwd -P)/../:$PYTHONPATH" >> $GITHUB_ENV
+        echo "LD_LIBRARY_PATH=$( pwd -P)/compiled/" >> $GITHUB_ENV
+        # just hardcode it
+        echo "PYTHONPATH=/home/runner/work/DeepJetCore/DeepJetCore/bin:/home/runner/work/DeepJetCore/DeepJetCore:/home/runner/work/DeepJetCore:$PYTHONPATH" >> $GITHUB_ENV
+
+    - name: Check env vars 1
+      run: |
+        echo "DEEPJETCORE"
+        echo $DEEPJETCORE
+        echo "PATH"
+        echo $PATH
+        echo "PYTHONPATH"
+        echo $PYTHONPATH 
+        echo "LD_LIBRARY_PATH"
+        echo $LD_LIBRARY_PATH
+
+    - name: Compile
+      run: |
+        cd $DEEPJETCORE/compiled
+        make -f Makefile_conda -j4
+
+    - name: "Unit tests"
+      run: |
+        cd $DEEPJETCORE/testing/unit
+        python test.py
+        cd $DEEPJETCORE
+        
+    - name: Create subpackage
+      run: |
+        python bin/createSubpackage.py --data here
+
+    - name: Set env vars 2
+      run: |
+        # normally one would `source env.sh` but GHA can't do this
+        # It's not possible to chain these - an exported env var only becomes available in next step
+        echo "DEEPJETCORE_SUBPACKAGE=$DEEPJETCORE/here" >> $GITHUB_ENV    
+        echo "PATH=$HERE/scripts:$PATH" >> $GITHUB_ENV
+        echo "LD_LIBRARY_PATH=$HERE/modules/compiled:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+        # just hardcode it
+        echo "PYTHONPATH=/home/runner/work/DeepJetCore/DeepJetCore/here/modules:/home/runner/work/DeepJetCore/DeepJetCore/here/modules/datastructures:/home/runner/work/DeepJetCore/DeepJetCore/here/modules/compiled:$PYTHONPATH" >> $GITHUB_ENV
+          
+    - name: Check env vars 2
+      run: |
+        echo "DEEPJETCORE_SUBPACKAGE"
+        echo $DEEPJETCORE_SUBPACKAGE
+        echo "PYTHONPATH"
+        echo $PYTHONPATH
+        echo "PATH"
+        echo $PATH 
+        echo "LD_LIBRARY_PATH"
+        echo $LD_LIBRARY_PATH
+
+        
+    - name: Convert files
+      run: |   
+        cd $DEEPJETCORE
+        python bin/convertFromSource.py -i here/example_data/train_files.txt -o here/dctrain -c TrainData_example
+
+    - name: "Run training"
+      run: |
+        # cd $DEEPJETCORE
+        python here/Train/training_example.py here/dctrain/dataCollection.djcdc here/training
+
+    - name: "Run eval"
+      run: |
+        predict.py here/training/KERAS_model.h5 here/training/trainsamples.djcdc here/example_data/test_files.txt here/out_predict
+        
+    
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3b41cc9..595342c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,4 +16,9 @@ __pycache__
 *.csv
 *.root
 *.json
+.DS_Store
 #*.*#
+build/
+*.egg-info/
+bin/make_example_data_overlap
+
diff --git a/DJCLayers.py b/DJCLayers.py
new file mode 100644
index 0000000..1d86828
--- /dev/null
+++ b/DJCLayers.py
@@ -0,0 +1,163 @@
+
+djc_global_layers_list={}
+
+from keras.layers import Layer
+import tensorflow as tf
+
+
+
+class StopGradient(Layer):
+    def __init__(self, **kwargs):
+        super(StopGradient, self).__init__(**kwargs)
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return tf.stop_gradient(inputs)
+
+djc_global_layers_list['StopGradient']=StopGradient   
+
+class SelectFeatures(Layer):
+    def __init__(self, index_left, index_right, **kwargs):
+        super(SelectFeatures, self).__init__(**kwargs)
+        self.index_left=index_left
+        self.index_right=index_right
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape[:-1] + (self.index_right-self.index_left,)
+    
+    def call(self, inputs):
+        
+        return inputs[...,self.index_left:self.index_right]
+    
+    def get_config(self):
+        config = {'index_left': self.index_left,'index_right': self.index_right}
+        base_config = super(SelectFeatures, self).get_config()
+        return dict(list(base_config.items()) + list(config.items() ))
+    
+    
+djc_global_layers_list['SelectFeatures']=SelectFeatures
+
+
+class ScalarMultiply(Layer):
+    def __init__(self, factor, **kwargs):
+        super(ScalarMultiply, self).__init__(**kwargs)
+        self.factor=factor
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return inputs*self.factor
+    
+    def get_config(self):
+        config = {'factor': self.factor}
+        base_config = super(ScalarMultiply, self).get_config()
+        return dict(list(base_config.items()) + list(config.items() ))
+    
+    
+djc_global_layers_list['ScalarMultiply']=ScalarMultiply
+
+class Print(Layer):
+    def __init__(self, message, **kwargs):
+        super(Print, self).__init__(**kwargs)
+        self.message=message
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return tf.Print(inputs,[inputs],self.message,summarize=300)
+    
+    def get_config(self):
+        config = {'message': self.message}
+        base_config = super(Print, self).get_config()
+        return dict(list(base_config.items()) + list(config.items() ))
+    
+
+djc_global_layers_list['Print']=Print
+
+### the following ones should go to DeepJetCore
+
+class ReplaceByNoise(Layer):
+    def __init__(self, **kwargs):
+        super(ReplaceByNoise, self).__init__(**kwargs)
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return tf.random_normal(shape=tf.shape(inputs),
+                                mean=0.0,
+                                stddev=1.0,
+                                dtype='float32')
+        
+    
+    def get_config(self):
+        base_config = super(ReplaceByNoise, self).get_config()
+        return dict(list(base_config.items()))
+    
+
+djc_global_layers_list['ReplaceByNoise']=ReplaceByNoise
+    
+    
+
+class FeedForward(Layer):
+    def __init__(self, **kwargs):
+        super(FeedForward, self).__init__(**kwargs)
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return 1.*inputs
+        
+    
+    def get_config(self):
+        base_config = super(FeedForward, self).get_config()
+        return dict(list(base_config.items()))
+    
+
+djc_global_layers_list['FeedForward']=FeedForward
+    
+
+class Clip(Layer):
+    def __init__(self, min, max , **kwargs):
+        super(Clip, self).__init__(**kwargs)
+        self.min=min
+        self.max=max
+    
+    def compute_output_shape(self, input_shape):
+        return input_shape
+    
+    def call(self, inputs):
+        return tf.clip_by_value(inputs, self.min, self.max)
+    
+    def get_config(self):
+        config = {'min': self.min, 'max': self.max}
+        base_config = super(Clip, self).get_config()
+        return dict(list(base_config.items()) + list(config.items() ))
+    
+djc_global_layers_list['Clip']=Clip
+
+
+class ReduceSumEntirely(Layer):
+    def __init__(self,  **kwargs):
+        super(ReduceSumEntirely, self).__init__(**kwargs)
+        
+    
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0],1)
+    
+    def call(self, inputs):
+        red_axes=(inputs.shape[1:]).as_list()
+        red_axes = [i+1 for i in range(len(red_axes))]
+        return tf.expand_dims(tf.reduce_sum(inputs,axis=red_axes),axis=1)
+    
+    def get_config(self):
+        base_config = super(ReduceSumEntirely, self).get_config()
+        return dict(list(base_config.items()))
+    
+
+djc_global_layers_list['ReduceSumEntirely']=ReduceSumEntirely
\ No newline at end of file
diff --git a/DJCLosses.py b/DJCLosses.py
new file mode 100644
index 0000000..a01bf4f
--- /dev/null
+++ b/DJCLosses.py
@@ -0,0 +1,11 @@
+
+djc_global_loss_list = {}
+
+import tensorflow as tf
+
+def dummy_loss(truth, pred):
+    #t = tf.Print(truth,[truth],'truth ')
+    #p = tf.Print(pred,[pred],'pred ')
+    return (tf.reduce_mean(truth)-tf.reduce_mean(pred))**2
+
+djc_global_loss_list['dummy_loss']=dummy_loss
\ No newline at end of file
diff --git a/DataCollection.py b/DataCollection.py
index e0098d6..5d3619d 100644
--- a/DataCollection.py
+++ b/DataCollection.py
@@ -3,73 +3,58 @@
 
 @author: jkiesele
 '''
-#from tensorflow.contrib.labeled_tensor import batch
-#from builtins import list
-from __future__ import print_function
 
-from Weighter import Weighter
-from TrainData import TrainData, fileTimeOut
-#for convenience
-import logging
-from pdb import set_trace
-import copy
-
-usenewformat=True
 
+from DeepJetCore.TrainData import TrainData
+from DeepJetCore.dataPipeline import TrainDataGenerator
+import tempfile
+import pickle
+import shutil
+import os
+import copy
+import time
+import logging
+from DeepJetCore.stopwatch import stopwatch
+logger = logging.getLogger(__name__)
 
-# super not-generic without safety belts
-#needs some revision
-class BatchRandomInputGenerator(object):
-    def __init__(self, ranges, batchsize):
-        self.ranges=ranges
-        self.batchsize=batchsize
-        
-    def generateBatch(self):
-        import numpy as np
-        randoms=[]
-        for i in range(len(self.ranges)):
-            randoms.append(np.full((1,self.batchsize),np.random.uniform(self.ranges[i][0], self.ranges[i][1], size=1)[0]))
-        
-        nparr=np.dstack((randoms))
-        return nparr.reshape(nparr.shape[1],nparr.shape[2])
 
 class DataCollection(object):
     '''
     classdocs
     '''
-
-
-    def __init__(self, infile = None, nprocs = -1,useRelativePaths=True):
+    def __init__(self, infile = None, nprocs = -1):
         '''
         Constructor
         '''
         self.clear()
-        self.useRelativePaths=useRelativePaths
-        self.nprocs = nprocs       
-        self.meansnormslimit=500000 
+        
+        self.istestdata=False
+        self.batch_uses_sum_of_squares=False
+        self.gen = None
+        self.__batchsize=1
+        self.optionsdict={}
+        self.weighterobjects={}
+        self.batch_mode = False
+        self.nprocs=-1
+        self.no_copy_on_convert=True
+        
         if infile:
             self.readFromFile(infile)
-        
+            if not len(self.samples):
+                raise Exception("no valid datacollection found in "+infile)
+            
+    
+    def setDataClass(self, dataclass):
+        self.dataclass = dataclass
+        self.dataclass_instance = self.dataclass()
+           
     def clear(self):
         self.samples=[]
-        self.sampleentries=[]
-        self.originRoots=[]
-        self.nsamples=0
+        self.sourceList=[]
         self.dataDir=""
-        self.useweights=True
-        self.__batchsize=1
-        self.filesPreRead=2
-        self.isTrain=True
-        self.dataclass=TrainData() #for future implementations
-        self.weighter=Weighter()
-        self.weightsfraction=0.05
-        self.maxConvertThreads=2
-        self.maxFilesOpen=2
-        self.means=None
-        self.classweights={}
-
-    def __len__(self):
-        return self.nsamples
+        self.dataclass = TrainData
+        self.dataclass_instance = self.dataclass()
+        self.__nsamples = 0
 
     def __iadd__(self, other):
         'A += B'
@@ -80,32 +65,14 @@ def _extend_(a, b, name):
         _extend_(self, other, 'samples')
         if len(set(self.samples)) != len(self.samples):
             raise ValueError('The two DataCollections being summed contain the same files!')
-        _extend_(self, other, 'sampleentries')
-        _extend_(self, other, 'originRoots')
-        self.nsamples += other.nsamples
+        _extend_(self, other, 'sourceList')
         if self.dataDir != other.dataDir:
             raise ValueError('The two DataCollections have different data directories, still to be implemented!')
-        self.useweights = self.useweights and self.useweights
-        self.filesPreRead = min(self.filesPreRead, other.filesPreRead)
-        self.isTrain = self.isTrain and other.isTrain #arbitrary choice, could also raise exception
-        if type(self.dataclass) != type(other.dataclass):
-            raise ValueError(
-                'The two DataCollections were made with a'
-                ' different data class type! (%s, and %s)' % (type(self.dataclass), type(other.dataclass))
-                )
-        if self.weighter != other.weighter:
-            raise ValueError(
-                'The two DataCollections have different weights'
-                )
-        if self.weightsfraction != other.weightsfraction:
-            raise ValueError('The two DataCollections have different weight fractions')
-        self.maxConvertThreads = min(self.maxConvertThreads, other.maxConvertThreads)
-        self.maxFilesOpen = min(self.maxFilesOpen, other.maxFilesOpen)
-        if not all(self.means == other.means):
-            raise ValueError(
-                'The two DataCollections head different means'
-                )
-        self.classweights.update(other.classweights)
+        #if type(self.dataclass) != type(other.dataclass):
+        #    raise ValueError(
+        #        'The two DataCollections were made with a'
+        #        ' different data class type! (%s, and %s)' % (type(self.dataclass), type(other.dataclass))
+        #        )
         return self
 
     def __add__(self, other):
@@ -125,273 +92,252 @@ def __radd__(self, other):
         else:
             raise ValueError("I don't know how to add DataCollection and %s" % type(other))
         
-    def removeLast(self):
-        self.samples.pop()
-        self.nsamples-=self.sampleentries[-1]
-        self.sampleentries.pop()
-        self.originRoots.pop()
+    def __len__(self):
+        return len(self.samples)
+    
+    def _readMetaInfoIfNeeded(self):
+        if len(self.samples)<1:
+            return
+        if self.dataclass_instance is None:
+            self.dataclass_instance = self.dataclass()
+        if self.dataclass_instance.nElements() < 1:
+            self.dataclass_instance.readMetaDataFromFile(self.getSamplePath(self.samples[0]))
         
+    def _readNTotal(self):
+        if not len(self.samples):
+            return 0
+        gen = trainDataGenerator()
+        gen.setFileList([self.dataDir+"/"+s for s in self.samples])
+        return gen.getNTotal()
         
-    def getClassWeights(self):
-        if not len(self.classweights):
-            self.__computeClassWeights(self.dataclass.getUsedTruth())
-        return self.classweights
         
-    def __computeClassWeights(self,truthclassesarray):
-        if not len(self.samples):
-            raise Exception("DataCollection:computeClassWeights: no sample files associated")
-        import copy
-        td=copy.deepcopy(self.dataclass)
-        td.readIn(self.getSamplePath(self.samples[0]))
-        arr=td.y[0]
-        average=0
-        allist=[]
-        for i in range(arr.shape[1]):
-            entries=float((arr[:,i]>0).sum())
-            average=average+entries
-            allist.append(entries)
-        
-        outdict={}
-        average=average/float((arr.shape[1]))
-        for i in range(len(allist)):
-            l=average/allist[i] 
-            outdict[i]=l
-        self.classweights=outdict
+    def removeLast(self):
+        self.samples.pop()
+        self.sourceList.pop()
         
-    
-    def defineCustomPredictionLabels(self, labels):
-        self.dataclass.defineCustomPredictionLabels(labels)
         
+    def getNumpyFeatureShapes(self):
+        if len(self.samples)<1:
+            raise Exception("DataCollection.getNumpyFeatureShapes: no files")
+            return []
+        self._readMetaInfoIfNeeded()
+        return self.dataclass_instance.getNumpyFeatureShapes()
     
-    def getCustomPredictionLabels(self):
-        if hasattr(self.dataclass, 'customlabels'):
-            return self.dataclass.customlabels
-        return None
+    def getNumpyFeatureDTypes(self):
+        if len(self.samples)<1:
+            raise Exception("DataCollection.getNumpyFeatureDTypes: no files")
+            return []
+        self._readMetaInfoIfNeeded()
+        return self.dataclass_instance.getNumpyFeatureDTypes()
         
-    def getInputShapes(self):
-        '''
-        gets the input shapes from the data class description
-        '''
+    
+    def getNumpyFeatureArrayNames(self):
         if len(self.samples)<1:
+            raise Exception("DataCollection.getNumpyFeatureNames: no files")
             return []
-        self.dataclass.filelock=None
-        td=copy.deepcopy(self.dataclass)
-        td.readIn(self.getSamplePath(self.samples[0]),shapesOnly=True)
-        shapes=td.getInputShapes()
-        td.clear()
-        return shapes
+        self._readMetaInfoIfNeeded()
+        return self.dataclass_instance.getNumpyFeatureArrayNames()
     
-    def getTruthShape(self):
-        return self.dataclass.getTruthShapes()
-        
-    def getNRegressionTargets(self):
-        return (self.dataclass.getNRegressionTargets())
     
-    def getNClassificationTargets(self):
-        return (self.dataclass.getNClassificationTargets())
+    def getKerasFeatureDTypes(self):
+        print('DataCollection.getKerasFeatureDTypes: deprecation warning, use getNumpyFeatureArrayNames')
+        return self.getNumpyFeatureDTypes()
+   
+    def getKerasFeatureShapes(self):
+        print('DataCollection.getKerasFeatureShapes: deprecation warning, use getNumpyFeatureArrayNames')
+        return self.getNumpyFeatureShapes()
+        
+    def getKerasFeatureArrayNames(self):
+        print('DataCollection.getKerasFeatureArrayNames: deprecation warning, use getNumpyFeatureArrayNames')
+        return self.getNumpyFeatureArrayNames()
         
-    def getUsedTruth(self):
-        return self.dataclass.getUsedTruth()
+    def getInputShapes(self):
+        print('DataCollection:getInputShapes deprecated, use getNumpyFeatureShapes ')
+        return self.getNumpyFeatureShapes()
     
+       
     def setBatchSize(self,bsize):
-        if bsize > self.nsamples:
-            raise Exception('Batch size must not be bigger than total sample size')
         self.__batchsize=bsize
 
     def getBatchSize(self):
         return self.__batchsize
-        
-    @property
-    def batch_size(self):
-        return self.__batchsize
+    
 
-    def getSamplesPerEpoch(self):
-        #modify by batch split
-        count=self.getNBatchesPerEpoch()
-        if count != 1:
-            return count*self.__batchsize #final
-        else:
-            return self.nsamples
-        
-    def getAvEntriesPerFile(self):
-        return float(self.nsamples)/float(len(self.samples))
-        
     
-    def getNBatchesPerEpoch(self):
-        if self.__batchsize <= 1:
-            return 1
-        count=0
-        while (count+1)*self.__batchsize <= self.nsamples:
-            count+=1
-        return count
-        
-    def writeToFile(self,filename):
-        import pickle
-        fd=open(filename,'wb')
-        self.dataclass.clear()
-        pickle.dump(self.samples, fd,protocol=0 )
-        pickle.dump(self.sampleentries, fd,protocol=0 )
-        pickle.dump(self.originRoots, fd,protocol=0 )
-        pickle.dump(self.nsamples, fd,protocol=0 )
-        pickle.dump(self.useweights, fd,protocol=0 )
-        pickle.dump(self.__batchsize, fd,protocol=0 )
-        pickle.dump(self.dataclass, fd,protocol=0 )
-        pickle.dump(self.weighter, fd,protocol=0 )
-        #pickle.dump(self.means, fd,protocol=0 )
-        self.means.dump(fd)
-        fd.close()
+    def validate(self, remove=True, skip_first=0):
+        '''
+        checks if all samples in the collection can be read properly.
+        removes the invalid samples from the sample list.
+        Also removes the original link to the root file, so recover cannot be run
+        (this might be changed in future implementations)
+        '''
+        validsourcelist = len(self.samples) == len(self.sourceList)
+        newsamples=[]
+        newsources=[]
+        for i in range(len(self.samples)):
+            if i < skip_first: continue
+            td = self.dataclass ()
+            fullpath=self.getSamplePath(self.samples[i])
+            print('reading '+fullpath, str(i), '/', str(len(self.samples)))
+            try:
+                td.readFromFile(fullpath)
+                if hasattr(td, "isValid"):
+                    if not td.isValid():
+                        raise Exception("data validation failed for "+fullpath)
+                if td.nElements() < 1:
+                    print("warning, no data in file "+fullpath)
+                del td
+                newsamples.append(self.samples[i])
+                if validsourcelist:
+                    newsources.append(self.sourceList[i])
+                continue
+            except Exception as e:
+                print('problem with file, removing ', fullpath)
+                
+        self.samples = newsamples
+        self.newsources = newsources
+                
+    def removeEntry(self,relative_path_to_entry):
+        for i in range(len(self.samples)):
+            if relative_path_to_entry==self.samples[i]:
+                print('removing '+self.samples[i])
+                del self.samples[i]
+                del self.sourceList[i]
+                break
+                 
+        
+    def writeToFile(self,filename,abspath=False):
+        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
+            if not abspath:
+                pickle.dump(self.samples, fd,protocol=0 )
+            else:
+                pickle.dump([self.getSamplePath(s) for s in self.samples], fd,protocol=0 )
+            pickle.dump(self.sourceList, fd,protocol=0 )
+            pickle.dump(self.dataclass, fd,protocol=0 )
+            pickle.dump(self.weighterobjects, fd, protocol=0)
+            pickle.dump(self.__batchsize, fd, protocol=0)
+            pickle.dump(self.batch_uses_sum_of_squares, fd, protocol=0)
+            pickle.dump(self.optionsdict, fd, protocol=0)
+             
+        shutil.move(fd.name, filename)
+        os.chmod(filename, 0o644)
         
     def readFromFile(self,filename):
-        import pickle
         fd=open(filename,'rb')
         self.samples=pickle.load(fd)
-        self.sampleentries=pickle.load(fd)
-        self.originRoots=pickle.load(fd)
-        self.nsamples=pickle.load(fd)
-        self.useweights=pickle.load(fd)
-        self.__batchsize=pickle.load(fd)
-        self.dataclass=pickle.load(fd)
-        self.weighter=pickle.load(fd)
-        self.means=pickle.load(fd)
-        fd.close()
-        import os
+        self.sourceList=pickle.load(fd)
+        try:
+            self.dataclass=pickle.load(fd)
+            self.weighterobjects=pickle.load(fd)
+            self.__batchsize = pickle.load(fd)
+            self.batch_uses_sum_of_squares = pickle.load(fd)
+            self.optionsdict = pickle.load(fd)
+        except Exception as e:
+            print(e)
+            print("WARNING: wrong dataCollection format. Can still be used for training, but it is advised to recreate it: this is possible without converting the original data again using the script createDataCollectionFromTD.py (takes a few seconds)\nBookkeeping (e.g. for predict) will be broken unless data collection is updated to new format.")
+        finally:
+            fd.close()
+
         self.dataDir=os.path.dirname(os.path.abspath(filename))
         self.dataDir+='/'
-        #don't check if files exist
-        return 
-        for f in self.originRoots:
-            if not f.endswith(".root"): continue
-            if not os.path.isfile(f):
-                print('not found: '+f)
-                raise Exception('original root file not found')
-        for f in self.samples:
-            fpath=self.getSamplePath(f)
-            if not os.path.isfile(fpath):
-                print('not found: '+fpath)
-                raise Exception('sample file not found')
-        
-        
-    def readRootListFromFile(self,file):
-        import os
         
+        
+    def readSourceListFromFile(self, file, relpath='', checkfiles=False):
         self.samples=[]
-        self.sampleentries=[]
-        self.originRoots=[]
-        self.nsamples=0
+        self.sourceList=[]
+        self.__nsamples=0
         self.dataDir=""
         
+        td=self.dataclass()
+        
         fdir=os.path.dirname(file)
         fdir=os.path.abspath(fdir)
         fdir=os.path.realpath(fdir)
-        lines = [line.rstrip('\n') for line in open(file)]
+        lines = [(line.rstrip('\n')).rstrip(' ') for line in open(file)]
         for line in lines:
             if len(line) < 1: continue
-            if self.useRelativePaths:
-                self.originRoots.append(fdir+'/'+line)
+            if relpath:
+                self.sourceList.append(os.path.join(relpath, line))
             else:
-                self.originRoots.append(line)
-
-        if len(self.originRoots)<1:
-            raise Exception('root samples list empty')
+                self.sourceList.append(line)
+        if len(self.sourceList)<1:
+            raise Exception('source samples list empty')
+        
+        if checkfiles:
+            print('DataCollection: checking files')
+            self.sourceList=self.checkSourceFiles()
+        
+    def checkSourceFiles(self):
+        td=self.dataclass()
+        newsamples=[]
+        for s in self.sourceList:
+            logger.info('checking '+self.getSamplePath(s))
+            if td.fileIsValid(self.getSamplePath(s)):
+                newsamples.append(s)
+            else:
+                print('source file '+s+' seems to be broken, will skip processing it')
+        
+        return newsamples
         
         
     def split(self,ratio):
         '''
-        ratio is self/(out+self)
+        out fraction is (1-ratio)
         returns out
-        modifies itself
+        modifies self
         '''
-        import copy
         
+        nin = int(len(self.samples)*(ratio))
+        if nin < 1:
+            raise ValueError("DataCollection:split: less than one sample would remain")
         
+        if nin == len(self.samples):
+            raise ValueError("DataCollection:split: less than one sample would be assigned to output")
+            
         out=DataCollection()
-        itself=copy.deepcopy(self)
-        
-        nsamplefiles=len(self.samples)
-        
-        out.samples=[]
-        out.sampleentries=[]
-        out.originRoots=[]
-        out.nsamples=0
-        out.__batchsize=copy.deepcopy(self.__batchsize)
-        out.isTrain=copy.deepcopy(self.isTrain)
-        out.dataDir=self.dataDir
-
-        out.dataclass=copy.deepcopy(self.dataclass)
-        out.weighter=self.weighter #ref oks
-        out.means=self.means
-        out.useweights=self.useweights
-     
-        
-        itself.samples=[]
-        itself.sampleentries=[]
-        itself.originRoots=[]
-        itself.nsamples=0
-        
-        
-        
-        if nsamplefiles < 2:
-            out=copy.deepcopy(self)
-            print('DataCollection.split: warning: only one file, split will just return a copy of this')
-            return out
-    
-        for i in range(0, nsamplefiles):
-            frac=(float(i))/(float(nsamplefiles))
-            if frac < ratio and i < nsamplefiles-1:
-                itself.samples.append(self.samples[i])
-                itself.sampleentries.append(self.sampleentries[i])
-                itself.originRoots.append(self.originRoots[i])
-                itself.nsamples+=self.sampleentries[i]
-            else:
-                out.samples.append(self.samples[i])
-                out.sampleentries.append(self.sampleentries[i])
-                out.originRoots.append(self.originRoots[i])
-                out.nsamples+=self.sampleentries[i]
-           
+        out.dataDir = self.dataDir
+        out.dataclass = self.dataclass #anyway just a dummy
+        out.samples = self.samples[nin:]
+        self.samples = self.samples[:nin]
+        
+        if len(self.sourceList) == len(self.samples):
+            out.sourceList = self.sourceList[nin:]
+            self.sourceList = self.sourceList[:nin]
+        else:
+            self.sourceList = []
+            out.sourceList = []
         
+        #force re-read upon request
+        self.__nsamples = 0
+        out.__nsamples = 0
         
-        self.samples=itself.samples
-        self.sampleentries=itself.sampleentries
-        self.originRoots=itself.originRoots
-        self.nsamples=itself.nsamples
+        out.weighterobjects = copy.deepcopy(self.weighterobjects)
         
         return out
-    
-    
-    def createTestDataForDataCollection(
-            self, collectionfile, inputfile, 
-            outputDir, outname = 'dataCollection.dc',
-            batch_mode = False):
-        import copy
-        self.readFromFile(collectionfile)
-        self.dataclass.remove=False
-        self.dataclass.weight=True #False
-        self.readRootListFromFile(inputfile)
-        self.createDataFromRoot(
-            self.dataclass, outputDir, False,
-            dir_check = not batch_mode
-        )
-        self.writeToFile(outputDir+'/'+outname)
         
     
     def recoverCreateDataFromRootFromSnapshot(self, snapshotfile):
-        import os
         snapshotfile=os.path.abspath(snapshotfile)
         self.readFromFile(snapshotfile)
-        td=self.dataclass
-        #For emergency recover  td.reducedtruthclasses=['isB','isC','isUDSG']
-        if len(self.originRoots) < 1:
+
+        if len(self.sourceList) < 1:
             return
-        #if not self.means:
-        #    self.means=td.produceMeansFromRootFile(self.originRoots[0])
         outputDir=os.path.dirname(snapshotfile)+'/'
         self.dataDir=outputDir
         finishedsamples=len(self.samples)
         
         self.__writeData_async_andCollect(finishedsamples,outputDir)
-        self.writeToFile(outputDir+'/dataCollection.dc')
+        self.writeToFile(outputDir+'/dataCollection.djcdc')
         
+    def getAllLabels(self,nfiles=-1):
+        return self.extract_features(self.dataclass,'y',nfiles)
+    
+    def getAllFeatures(self,nfiles=-1):
+        return self.extract_features(self.dataclass,'x',nfiles)
     
+    def getAllWeights(self,nfiles=-1):
+        return self.extract_features(self.dataclass,'w',nfiles)
+        
     def createDataFromRoot(
                     self, dataclass, outputDir, 
                     redo_meansandweights=True, means_only=False, dir_check=True
@@ -404,177 +350,151 @@ def createDataFromRoot(
         to recover the data until a possible error occurred
         '''
         
-        if len(self.originRoots) < 1:
+        if len(self.sourceList) < 1:
             print('createDataFromRoot: no input root file')
             raise Exception('createDataFromRoot: no input root file')
         
-        import os
         outputDir+='/'
         if os.path.isdir(outputDir) and dir_check:
             raise Exception('output dir must not exist')
         elif not os.path.isdir(outputDir):
             os.mkdir(outputDir)
         self.dataDir=outputDir
-        self.nsamples=0
         self.samples=[]
-        self.sampleentries=[]
-        import copy
-        self.dataclass=copy.deepcopy(dataclass)
-        td=self.dataclass
-        ##produce weighter from a larger dataset as one file
-        
-        if redo_meansandweights and (td.remove or td.weight):
-            logging.info('producing weights and remove indices')
-            self.weighter = td.produceBinWeighter(
-                self.originRoots
-                )            
-            self.weighter.printHistos(outputDir)
-        
-        if redo_meansandweights:
-            logging.info('producing means and norms')
-            self.means = td.produceMeansFromRootFile(
-                self.originRoots, limit=self.meansnormslimit
-                )
-        
-        if means_only: return
-        self.__writeData_async_andCollect(0,outputDir)
-        
-        
+        self.dataclass=dataclass
+        td=self.dataclass()
+
+        self.weighterobjects = td.createWeighterObjects(self.sourceList)
+
+        if self.batch_mode:
+            for sample in self.sourceList:
+                self.__writeData(sample, outputDir)
+        else:
+            self.__writeData_async_andCollect(0, outputDir)
     
-    def __writeData(self,sample,means, weighter,outputDir,dataclass,number=-1):
-        import os
-        import copy
-        from stopwatch import stopwatch
+    def __writeData(self, sample, outputDir):
         sw=stopwatch()
-        td=copy.deepcopy(dataclass)
+        td=self.dataclass()
         
         fileTimeOut(sample,120) #once available copy to ram
-        ramdisksample= '/dev/shm/'+str(os.getpid())+os.path.basename(sample)
+
+        sbasename = os.path.basename(sample)
+        newname = sbasename[:sbasename.rfind('.')]+'.djctd'
+        newpath=os.path.abspath(outputDir+newname)
         
-        def removefile():
-            os.system('rm -f '+ramdisksample)
+        td.writeFromSourceFile(sample, self.weighterobjects, istraining=not self.istestdata, outname=newpath)
         
-        import atexit
-        atexit.register(removefile)
+        print('converted and written '+newname+' in ',sw.getAndReset(),' sec')
+        self.samples.append(newname)
+        td.clear()
         
-        os.system('cp '+sample+' '+ramdisksample)
-        try:
-            td.readFromRootFile(ramdisksample,means, weighter) 
-            newname=os.path.basename(sample).rsplit('.', 1)[0]
-            if number>0:
-                newname+=str(number)
+        if not self.batch_mode:
+            self.writeToFile(outputDir+'/snapshot.djcdc')
             
-            if usenewformat:
-                newname+='.meta'
-            else:
-                newname+='.z'
-            newpath=os.path.abspath(outputDir+newname)
-            td.writeOut(newpath)
-            print('converted and written '+newname+' in ',sw.getAndReset(),' sec')
-            self.samples.append(newname)
-            self.nsamples+=td.nsamples
-            self.sampleentries.append(td.nsamples)
-            td.clear()
-            self.writeToFile(outputDir+'/snapshot.dc')
-        except Exception as e:
-            removefile()
-            raise e
-        removefile()
-        
         
     def __writeData_async_andCollect(self, startindex, outputDir):
         
-        #set tree name to use
-        import DeepJetCore.preprocessing
-        DeepJetCore.preprocessing.setTreeName(self.dataclass.treename)
+        td=self.dataclass()
+        
+        if self.nprocs == 1 or (hasattr(td, "no_fork") and td.no_fork):#no need to fork
+            for sample in self.sourceList:
+                
+                sbasename = os.path.basename(sample)
+                newname = sbasename[:sbasename.rfind('.')]+'.djctd'
+                newpath=os.path.abspath(outputDir+newname)
+                
+                logger.info('convertFromSourceFile'+sample)
+                td.writeFromSourceFile(sample, self.weighterobjects, istraining = not self.istestdata, outname=newpath) 
+                self.samples.append(newname)
+                self.writeToFile(outputDir+'/snapshot.djcdc')
+                td.clear()
+            
+            self.writeToFile(outputDir+'/dataCollection.djcdc')
+            return
+            
         
         from multiprocessing import Process, Queue, cpu_count, Lock
         wo_queue = Queue()
         writelock=Lock()
-        import os
         thispid=str(os.getpid())
-        if not os.path.isfile(outputDir+'/snapshot.dc'):
-            self.writeToFile(outputDir+'/snapshot.dc')
+        if not self.batch_mode and not os.path.isfile(outputDir+'/snapshot.djcdc'):
+            self.writeToFile(outputDir+'/snapshot.djcdc')
         
         tempstoragepath='/dev/shm/'+thispid
         
-        print('creating dir '+tempstoragepath)
+        logger.info('creating dir '+tempstoragepath)
         os.system('mkdir -p '+tempstoragepath)
         
         def writeData_async(index,woq,wrlck):
+
+            logger.info('async started')
             
-            import copy
-            from stopwatch import stopwatch
             sw=stopwatch()
-            td=copy.deepcopy(self.dataclass)
-            sample=self.originRoots[index]
-            ramdisksample= tempstoragepath+'/'+str(os.getpid())+os.path.basename(sample)
-            
-            def removefile():
-                os.system('rm -f '+ramdisksample)
-            
-            import atexit
-            atexit.register(removefile)
+            td=self.dataclass()
+            sample=self.sourceList[index]
+
+            if self.batch_mode or self.no_copy_on_convert:
+                tmpinput = sample
+
+                def removefile():
+                    pass
+            else:
+                tmpinput = tempstoragepath+'/'+str(os.getpid())+'_tmp_'+os.path.basename(sample)
+                
+                def removefile():
+                    os.system('rm -f '+tmpinput)
+                
+                import atexit
+                atexit.register(removefile)
+
+                logger.info('start cp')
+                os_ret=os.system('cp '+sample+' '+tmpinput)
+                if os_ret:
+                    raise Exception("copy to ramdisk not successful for "+sample)
+                
             success=False
             out_samplename=''
             out_sampleentries=0
-            newname=os.path.basename(sample).rsplit('.', 1)[0]
-            newname+=str(index)
-                
-            if usenewformat:
-                newname+='.meta'
-            else:
-                newname+='.z'
+            sbasename = os.path.basename(sample)
+            newname = sbasename[:sbasename.rfind('.')]+'.djctd'
             newpath=os.path.abspath(outputDir+newname)
             
-            
-            
             try:
-                fileTimeOut(sample,120) #once available copy to ram
-                os.system('cp '+sample+' '+ramdisksample)
-                td.readFromRootFile(ramdisksample,self.means, self.weighter) 
-                #wrlck.acquire()
-                td.writeOut(newpath)
-                #wrlck.release()
+                logger.info('convertFromSourceFile')
+                td.writeFromSourceFile(tmpinput, self.weighterobjects, istraining = not self.istestdata, outname=newpath) 
                 print('converted and written '+newname+' in ',sw.getAndReset(),' sec -', index)
                 
                 out_samplename=newname
-                out_sampleentries=td.nsamples
+                out_sampleentries=1
                 success=True
                 td.clear()
                 removefile()
                 woq.put((index,[success,out_samplename,out_sampleentries]))
                 
-                
             except:
                 print('problem in '+newname)
                 removefile()
                 woq.put((index,[False,out_samplename,out_sampleentries]))
                 raise 
-            
-            
-        
         
         def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
             if not successful:
                 raise Exception("write not successful, stopping")
-            import os
+
             self.samples.append(samplename)
-            self.nsamples+=sampleentries
-            self.sampleentries.append(sampleentries)
-            self.writeToFile(outputDir+'/snapshot_tmp.dc')#avoid to overwrite directly
-            os.system('mv '+outputDir+'/snapshot_tmp.dc '+outputDir+'/snapshot.dc')
+            if not self.batch_mode:
+                self.writeToFile(outputDir+'/snapshot_tmp.djcdc')#avoid to overwrite directly
+                os.system('mv '+outputDir+'/snapshot_tmp.djcdc '+outputDir+'/snapshot.djcdc')
             
         processes=[]
         processrunning=[]
         processfinished=[]
-        for i in range(startindex,len(self.originRoots)):
+        for i in range(startindex,len(self.sourceList)):
             processes.append(Process(target=writeData_async, args=(i,wo_queue,writelock) ) )
             processrunning.append(False)
             processfinished.append(False)
         
         nchilds = int(cpu_count()/2)-2 if self.nprocs <= 0 else self.nprocs
-        #import os
         #if 'nvidiagtx1080' in os.getenv('HOSTNAME'):
         #    nchilds=cpu_count()-5
         if nchilds<1: 
@@ -587,7 +507,7 @@ def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
         lastindex=startindex-1
         alldone=False
         results=[]
-        import time
+
         try:
             while not alldone:
                 nrunning=0
@@ -600,7 +520,7 @@ def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
                     if processrunning[i]:continue
                     if processfinished[i]:continue
                     time.sleep(0.1)
-                    logging.info('starting %s...' % self.originRoots[startindex+i])
+                    logging.info('starting %s...' % self.sourceList[startindex+i])
                     processes[i].start()
                     processrunning[i]=True
                     nrunning+=1
@@ -611,18 +531,18 @@ def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
                     res=wo_queue.get()
                     results.append(res)
                     originrootindex=res[0]
-                    logging.info('finished %s...' % self.originRoots[originrootindex])
+                    logging.info('finished %s...' % self.sourceList[originrootindex])
                     processfinished[originrootindex-startindex]=True
                     processes      [originrootindex-startindex].join(5)
                     processrunning [originrootindex-startindex]=False  
                     #immediately send the next
                     continue
                   
-                    
+                results = sorted(results, key=lambda x:x[0])    
                 for r in results:
                     thisidx=r[0]
                     if thisidx==lastindex+1:
-                        logging.info('>>>> collected result %d of %d' % (thisidx,len(self.originRoots)))
+                        logging.info('>>>> collected result %d of %d' % (thisidx+1,len(self.sourceList)))
                         __collectWriteInfo(r[1][0],r[1][1],r[1][2],outputDir)
                         lastindex=thisidx        
                 
@@ -636,386 +556,90 @@ def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
             raise 
         os.system('rm -rf '+tempstoragepath)
         
-    def convertListOfRootFiles(
-                    self, inputfile, dataclass, outputDir, 
-                    takemeansfrom='', means_only = False,
-                    output_name = 'dataCollection.dc', batch_mode = False):
+    def convertListOfRootFiles(self, inputfile, dataclass, outputDir, 
+            takeweightersfrom='', means_only=False,
+            output_name='dataCollection.djcdc',
+            relpath='', checkfiles=False):
+        
         newmeans=True
-        if takemeansfrom:
-            self.readFromFile(takemeansfrom)
+        if takeweightersfrom:
+            self.readFromFile(takeweightersfrom)
             newmeans=False
-        self.readRootListFromFile(inputfile)
+        self.dataclass = dataclass
+        self.readSourceListFromFile(inputfile, relpath=relpath,checkfiles=checkfiles)
         self.createDataFromRoot(
                     dataclass, outputDir, 
                     newmeans, means_only = means_only, 
-                    dir_check= not batch_mode
+                    dir_check= not self.batch_mode
                     )
         self.writeToFile(outputDir+'/'+output_name)
-        
-    def getAllLabels(self):
-        return self.__stackData(self.dataclass,'y')
-    
-    def getAllFeatures(self):
-        return self.__stackData(self.dataclass,'x')
-        
-    def getAllWeights(self):
-        return self.__stackData(self.dataclass,'w')
-    
+
     
     def getSamplePath(self,samplefile):
         #for backward compatibility
         if samplefile[0] == '/':
             return samplefile
         return self.dataDir+'/'+samplefile
-    
-    def __stackData(self, dataclass, selector):
-        import numpy
-        td=dataclass
-        out=[]
+
+    def extract_features(self, dataclass, selector,nfiles):
+        import numpy as np
+        td=self.dataclass()
         firstcall=True
+        count = 0
         for sample in self.samples:
-            td.readIn(self.getSamplePath(sample))
+            count+=1;
+            td.readFromFile(self.getSamplePath(sample))
             #make this generic
             thislist=[]
             if selector == 'x':
-                thislist=td.x
+                thislist=td.transferFeatureListToNumpy(False)
             if selector == 'y':
-                thislist=td.y
+                thislist=td.transferTruthListToNumpy(False)
             if selector == 'w':
-                thislist=td.w
-               
+                thislist=td.transferWeightListToNumpy(False)
             if firstcall:
                 out=thislist
                 firstcall=False
             else:
                 for i in range(0,len(thislist)):
-                    if selector == 'w':
-                        out[i] = numpy.append(out[i],thislist[i])
+                    if len(thislist[i].shape) > 1:
+                        out[i] = np.vstack( (out[i], thislist[i] ) )
                     else:
-                        out[i] = numpy.vstack((out[i],thislist[i]))
-                
+                        out[i] = np.append(out[i],thislist[i])
+            if nfiles > 0:
+                if count > nfiles:
+                    break
         return out
+                    
+    def __stackData(self, dataclass, selector):
+        td=self.dataclass()
+        out=[]
+        firstcall=True
+        for sample in self.samples:
+            td2 = self.dataclass()
+            td2.readFromFile(self.getSamplePath(sample))
+            td.append(td2)
+        return td
     
-        
+    def invokeGenerator(self, *args, **kwargs):
+        generator = TrainDataGenerator( *args, 
+                                        cast_to=self.dataclass,
+                                        **kwargs)
+        generator.setBatchSize(self.__batchsize)
+        generator.setSquaredElementsLimit(self.batch_uses_sum_of_squares)
+        generator.setFileList([self.dataDir+ "/" + s for s in self.samples])
+        return generator
     
+    def getExampleFeatureBatch(self):
+        if len(self.samples)<1:
+            raise RuntimeError("getExampleBatch: only works if there is at least one sample in the data collection.")
+        td = self.dataclass()
+        td.readFromFile(self.getSamplePath(self.samples[0]))
+        td.skim(0)
+        return td.transferFeatureListToNumpy(False)
         
-    def generator(self):
-        import numpy
-        import copy
-        from sklearn.utils import shuffle
-        import shutil
-        import uuid
-        import os
-        import copy
-        import threading
-        import time
-        print('start generator')
-        #helper class
-        class tdreader(object):
-            def __init__(self,filelist,maxopen,tdclass):
-                
-                self.filelist=filelist
-                self.nfiles=len(filelist)
-                self.max=min(maxopen,len(filelist))
-                self.tdlist=[]
-                self.tdopen=[]
-                self.tdclass=copy.deepcopy(tdclass)
-                self.tdclass.clear()#only use the format, no data
-                #self.copylock=thread.allocate_lock()
-                for i in range(self.nfiles):
-                    self.tdlist.append(copy.deepcopy(tdclass))
-                    self.tdopen.append(False)
-                    
-                self.closeAll() #reset state
-                self.shuffleseed=0
-                
-            def start(self):
-                if self.max < 1:
-                    raise ValueError('I got an invalid number of files to open (%d)' % self.max)
-                for i in range(self.max):
-                    self.__readNext()
-                    time.sleep(1)
-                    
-            
-                
-            def __readNext(self):
-                #make sure this fast function has exited before getLast tries to read the file
-                import copy
-                readfilename=self.filelist[self.filecounter]
-                if len(filelist)>1:
-                    self.tdlist[self.nextcounter].clear()
-                self.tdlist[self.nextcounter]=copy.deepcopy(self.tdclass)
-                self.tdlist[self.nextcounter].readthread=None
-                
-                def startRead(counter,filename,shuffleseed):   
-                    excounter=0
-                    while excounter<10:
-                        try:
-                            self.tdlist[counter].readIn_async(filename,ramdiskpath='/dev/shm/',
-                                                              randomseed=shuffleseed)
-                            break
-                        except Exception as d:
-                            print(self.filelist[counter]+' read error, retry...')
-                            self.tdlist[counter].readIn_abort()
-                            excounter=excounter+1
-                            if excounter<10:
-                                time.sleep(5)
-                                continue
-                            traceback.print_exc(file=sys.stdout)
-                            raise d
-                    
-                t=threading.Thread(target=startRead, args=(self.nextcounter,readfilename,self.shuffleseed))    
-                t.start()
-                self.shuffleseed+=1
-                if self.shuffleseed>1e5:
-                    self.shuffleseed=0
-                #startRead(self.nextcounter,readfilename,self.shuffleseed)
-                self.tdopen[self.nextcounter]=True
-                self.filecounter=self.__increment(self.filecounter,self.nfiles)
-                self.nextcounter=self.__increment(self.nextcounter,self.nfiles)
-                
-                
-                
-            def __getLast(self):
-                self.tdlist[self.lastcounter].readIn_join(wasasync=True,waitforStart=True)
-                td=self.tdlist[self.lastcounter]
-                #print('got ',self.lastcounter)
-                
-                self.tdopen[self.lastcounter]=False
-                self.lastcounter=self.__increment(self.lastcounter,self.nfiles)
-                return td
-                
-            def __increment(self,counter,maxval):
-                counter+=1
-                if counter>=maxval:
-                    counter=0   
-                return counter 
+    
             
-            def __del__(self):
-                self.closeAll()
-                
-            def closeAll(self):
-                for i in range(len(self.tdopen)):
-                    try:
-                        if self.tdopen[i]:
-                            self.tdlist[i].readIn_abort()
-                            self.tdlist[i].clear()
-                            self.tdopen[i]=False
-                    except: pass
-                    self.tdlist[i].removeRamDiskFile()
-                
-                self.nextcounter=0
-                self.lastcounter=0
-                self.filecounter=0
-                
-            def get(self):
-                
-                td=self.__getLast()
-                self.__readNext()
-                return td
-                
         
-        td=(self.dataclass)
-        totalbatches=self.getNBatchesPerEpoch()
-        processedbatches=0
-        
-        ####generate randoms by batch
-        batchgen=None
-        if hasattr(td,'generatePerBatch') and td.generatePerBatch:
-            ranges=td.generatePerBatch
-            batchgen=BatchRandomInputGenerator(ranges, self.__batchsize)
-        
-        xstored=[numpy.array([])]
-        dimx=0
-        ystored=[]
-        dimy=0
-        wstored=[]
-        dimw=0
-        nextfiletoread=0
-        
-        target_xlistlength=len(td.getInputShapes())
-        
-        xout=[]
-        yout=[]
-        wout=[]
-        samplefilecounter=0
-        
-        #prepare file list
-        filelist=[]
-        for s in self.samples:
-            filelist.append(self.getSamplePath(s))
-        
-        TDReader=tdreader(filelist, self.maxFilesOpen, self.dataclass)
-        
-        #print('generator: total batches '+str(totalbatches))
-        print('start file buffering...')
-        TDReader.start()
-        #### 
-        #
-        # make block class for file read with get function that starts the next read automatically
-        # and closes all files in destructor?
-        #
-        #  check if really the right ones are read....
-        #
-        psamples=0 #for random shuffling
-        nepoch=0
-        while 1:
-            if processedbatches == totalbatches:
-                processedbatches=0
-                nepoch+=1
-            
-            lastbatchrest=0
-            if processedbatches == 0: #reset buffer and start new
-                #print('DataCollection: new turnaround')
-                xstored=[numpy.array([])]
-                dimx=0
-                ystored=[]
-                dimy=0
-                wstored=[]
-                dimw=0
-                lastbatchrest=0
-                
-                
-            else:
-                lastbatchrest=xstored[0].shape[0]
-            
-            batchcomplete=False
-            
-            
-            
-            if lastbatchrest >= self.__batchsize:
-                batchcomplete = True
-                
-            # if(xstored[1].ndim==1):
-                
-            while not batchcomplete:
-                import sys, traceback
-                try:
-                    td=TDReader.get()
-                except:
-                    traceback.print_exc(file=sys.stdout)
-                    
-                if td.x[0].shape[0] == 0:
-                    print('Found empty (corrupted?) file, skipping')
-                    continue
-                
-                if xstored[0].shape[0] ==0:
-                    #print('dc:read direct') #DEBUG
-                    xstored=td.x
-                    dimx=len(xstored)
-                    ystored=td.y
-                    dimy=len(ystored)
-                    wstored=td.w
-                    dimw=len(wstored)
-                    if not self.useweights:
-                        dimw=0
-                    xout=[]
-                    yout=[]
-                    wout=[]
-                    for i in range(0,dimx):
-                        xout.append([])
-                    for i in range(0,dimy):
-                        yout.append([])
-                    for i in range(0,dimw):
-                        wout.append([])
-                        
-                else:
-                    
-                    #randomly^2 shuffle - not needed every time
-                    if psamples%2==0 and nepoch%2==1:
-                        for i in range(0,dimx):
-                            td.x[i]=shuffle(td.x[i], random_state=psamples)
-                        for i in range(0,dimy):
-                            td.y[i]=shuffle(td.y[i], random_state=psamples)
-                        for i in range(0,dimw):
-                            td.w[i]=shuffle(td.w[i], random_state=psamples)
-                    
-                    
-                    
-                    for i in range(0,dimx):
-                        if(xstored[i].ndim==1):
-                            xstored[i] = numpy.append(xstored[i],td.x[i])
-                        else:
-                            xstored[i] = numpy.vstack((xstored[i],td.x[i]))
-                    
-                    for i in range(0,dimy):
-                        if(ystored[i].ndim==1):
-                            ystored[i] = numpy.append(ystored[i],td.y[i])
-                        else:
-                            ystored[i] = numpy.vstack((ystored[i],td.y[i]))
-                    
-                    for i in range(0,dimw):
-                        if(wstored[i].ndim==1):
-                            wstored[i] = numpy.append(wstored[i],td.w[i])
-                        else:
-                            wstored[i] = numpy.vstack((wstored[i],td.w[i]))
-                    
-                if xstored[0].shape[0] >= self.__batchsize:
-                    batchcomplete = True
-                    
-                #limit of the random generator number 
-                psamples+=  td.x[0].shape[0]   
-                if psamples > 4e8:
-                    psamples/=1e6
-                    psamples=int(psamples)
-                
-                td.clear()
-
-
-                
-            if batchcomplete:
-                
-                #print('batch complete, split')#DEBUG
-                
-                for i in range(0,dimx):
-                    splitted=numpy.split(xstored[i],[self.__batchsize])
-                    xstored[i] = splitted[1]
-                    xout[i] = splitted[0]
-                for i in range(0,dimy):
-                    splitted=numpy.split(ystored[i],[self.__batchsize])
-                    ystored[i] = splitted[1]
-                    yout[i] = splitted[0]
-                for i in range(0,dimw):
-                    splitted=numpy.split(wstored[i],[self.__batchsize])
-                    wstored[i] = splitted[1]
-                    wout[i] = splitted[0]
-            
-            for i in range(0,dimx):
-                if(xout[i].ndim==1):
-                    xout[i]=(xout[i].reshape(xout[i].shape[0],1)) 
-                if not xout[i].shape[1] >0:
-                    raise Exception('serious problem with the output shapes!!')
-                            
-            for i in range(0,dimy):
-                if(yout[i].ndim==1):
-                    yout[i]=(yout[i].reshape(yout[i].shape[0],1))
-                if not yout[i].shape[1] >0:
-                    raise Exception('serious problem with the output shapes!!')
-                    
-            for i in range(0,dimw):
-                if(wout[i].ndim==1):
-                    wout[i]=(wout[i].reshape(wout[i].shape[0],1))
-                if not xout[i].shape[1] >0:
-                    raise Exception('serious problem with the output shapes!!')
-            
-            processedbatches+=1
-            
-            
-            if batchgen:
-                if len(xout)<target_xlistlength:
-                    xout.append(batchgen.generateBatch())
-                else:
-                    xout[-1]=batchgen.generateBatch()
-                    
-            
-            if self.useweights:
-                yield (xout,yout,wout)
-            else:
-                yield (xout,yout)
-            
-            
-
-    
-    
-    
+        
     
diff --git a/DataCollection_compat.py b/DataCollection_compat.py
new file mode 100644
index 0000000..b36d41b
--- /dev/null
+++ b/DataCollection_compat.py
@@ -0,0 +1,1099 @@
+'''
+Created on 21 Feb 2017
+
+@author: jkiesele
+'''
+#from tensorflow.contrib.labeled_tensor import batch
+#from builtins import list
+from __future__ import print_function
+
+import os
+import copy
+import pickle
+import time
+import tempfile
+import shutil
+from stopwatch import stopwatch
+import numpy as np
+from Weighter import Weighter
+from TrainData import TrainData, fileTimeOut
+#for convenience
+import logging
+from pdb import set_trace
+
+usenewformat=True
+
+logger = logging.getLogger(__name__)
+
+# super not-generic without safety belts
+#needs some revision
+class BatchRandomInputGenerator(object):
+    def __init__(self, ranges, batchsize):
+        self.ranges=ranges
+        self.batchsize=batchsize
+        
+    def generateBatch(self):
+        randoms=[]
+        for i in range(len(self.ranges)):
+            randoms.append(np.full((1,self.batchsize),np.random.uniform(self.ranges[i][0], self.ranges[i][1], size=1)[0]))
+        
+        nparr=np.dstack((randoms))
+        return nparr.reshape(nparr.shape[1],nparr.shape[2])
+
+class DataCollection(object):
+    '''
+    classdocs
+    '''
+
+
+    def __init__(self, infile = None, nprocs = -1):
+        '''
+        Constructor
+        '''
+        self.clear()
+        self.nprocs = nprocs       
+        self.meansnormslimit=500000 
+        if infile:
+            self.readFromFile(infile)
+            #check for consistency
+            if not len(self.samples):
+                raise Exception("no valid datacollection found in "+infile)
+
+        # Running data conversion etc. on a batch farm
+        self.batch_mode = False
+        self.no_copy_on_convert = False
+        
+    def clear(self):
+        self.samples=[]
+        self.sampleentries=[]
+        self.originRoots=[]
+        self.nsamples=0
+        self.dataDir=""
+        self.useweights=True
+        self.__batchsize=1
+        self.filesPreRead=2
+        self.isTrain=True
+        self.dataclass=TrainData() #for future implementations
+        self.weighter=Weighter()
+        self.weightsfraction=0.05
+        self.maxConvertThreads=2
+        self.maxFilesOpen=2
+        self.means=None
+        self.classweights={}
+
+    def __len__(self):
+        return self.nsamples
+
+    def __iadd__(self, other):
+        'A += B'
+        if not isinstance(other, DataCollection):
+            raise ValueError("I don't know how to add DataCollection and %s" % type(other))
+        def _extend_(a, b, name):
+            getattr(a, name).extend(getattr(b, name))
+        _extend_(self, other, 'samples')
+        if len(set(self.samples)) != len(self.samples):
+            raise ValueError('The two DataCollections being summed contain the same files!')
+        _extend_(self, other, 'sampleentries')
+        _extend_(self, other, 'originRoots')
+        self.nsamples += other.nsamples
+        if self.dataDir != other.dataDir:
+            raise ValueError('The two DataCollections have different data directories, still to be implemented!')
+        self.useweights = self.useweights and self.useweights
+        self.filesPreRead = min(self.filesPreRead, other.filesPreRead)
+        self.isTrain = self.isTrain and other.isTrain #arbitrary choice, could also raise exception
+        if type(self.dataclass) != type(other.dataclass):
+            raise ValueError(
+                'The two DataCollections were made with a'
+                ' different data class type! (%s, and %s)' % (type(self.dataclass), type(other.dataclass))
+                )
+        if self.weighter != other.weighter:
+            raise ValueError(
+                'The two DataCollections have different weights'
+                )
+        if self.weightsfraction != other.weightsfraction:
+            raise ValueError('The two DataCollections have different weight fractions')
+        self.maxConvertThreads = min(self.maxConvertThreads, other.maxConvertThreads)
+        self.maxFilesOpen = min(self.maxFilesOpen, other.maxFilesOpen)
+        if not all(self.means == other.means):
+            raise ValueError(
+                'The two DataCollections head different means'
+                )
+        self.classweights.update(other.classweights)
+        return self
+
+    def __add__(self, other):
+        'A+B'
+        if not isinstance(other, DataCollection):
+            raise ValueError("I don't know how to add DataCollection and %s" % type(other))
+        ret = copy.deepcopy(self)
+        ret += other
+        return ret
+
+    def __radd__(self, other):
+        'B+A to work with sum'
+        if other == 0:
+            return copy.deepcopy(self)
+        elif isinstance(other, DataCollection):
+            return self + other #we use the __add__ method
+        else:
+            raise ValueError("I don't know how to add DataCollection and %s" % type(other))
+        
+    def removeLast(self):
+        self.samples.pop()
+        self.nsamples-=self.sampleentries[-1]
+        self.sampleentries.pop()
+        self.originRoots.pop()
+        
+        
+    def getClassWeights(self):
+        if not len(self.classweights):
+            self.__computeClassWeights(self.dataclass.getUsedTruth())
+        return self.classweights
+        
+    def __computeClassWeights(self,truthclassesarray):
+        if not len(self.samples):
+            raise Exception("DataCollection:computeClassWeights: no sample files associated")
+        td=copy.deepcopy(self.dataclass)
+        td.readIn(self.getSamplePath(self.samples[0]))
+        arr=td.y[0]
+        average=0
+        allist=[]
+        for i in range(arr.shape[1]):
+            entries=float((arr[:,i]>0).sum())
+            average=average+entries
+            allist.append(entries)
+        
+        outdict={}
+        average=average/float((arr.shape[1]))
+        for i in range(len(allist)):
+            l=average/allist[i] 
+            outdict[i]=l
+        self.classweights=outdict
+        
+    
+    def prependToSampleFiles(self, path_to_prepend):
+        newsamples=[]
+        for s in self.samples:
+            newsamples.append(path_to_prepend+s)
+        self.samples=newsamples
+            
+    def defineCustomPredictionLabels(self, labels):
+        self.dataclass.defineCustomPredictionLabels(labels)
+        
+    
+    def getCustomPredictionLabels(self):
+        if hasattr(self.dataclass, 'customlabels'):
+            return self.dataclass.customlabels
+        return None
+        
+    def getInputShapes(self):
+        '''
+        gets the input shapes from the data class description
+        '''
+        if len(self.samples)<1:
+            return []
+        self.dataclass.filelock=None
+        td=copy.deepcopy(self.dataclass)
+        td.readIn(self.getSamplePath(self.samples[0]),shapesOnly=True)
+        shapes=td.getInputShapes()
+        td.clear()
+        return shapes
+    
+    def getTruthShape(self):
+        return self.dataclass.getTruthShapes()
+        
+    def getNRegressionTargets(self):
+        return (self.dataclass.getNRegressionTargets())
+    
+    def getNClassificationTargets(self):
+        return (self.dataclass.getNClassificationTargets())
+        
+    def getUsedTruth(self):
+        return self.dataclass.getUsedTruth()
+    
+    def setBatchSize(self,bsize):
+        if bsize > self.nsamples:
+            raise Exception('Batch size must not be bigger than total sample size')
+        self.__batchsize=bsize
+
+    def getBatchSize(self):
+        return self.__batchsize
+        
+    @property
+    def batch_size(self):
+        return self.__batchsize
+
+    def getSamplesPerEpoch(self):
+        #modify by batch split
+        count=self.getNBatchesPerEpoch()
+        if count != 1:
+            return count*self.__batchsize #final
+        else:
+            return self.nsamples
+        
+    def getAvEntriesPerFile(self):
+        return float(self.nsamples)/float(len(self.samples))
+        
+    
+    def getNBatchesPerEpoch(self):
+        if self.__batchsize <= 1:
+            return 1
+        count=0
+        while (count+1)*self.__batchsize <= self.nsamples:
+            count+=1
+        return count
+    
+    def validate(self, remove=True, skip_first=0):
+        '''
+        checks if all samples in the collection can be read properly.
+        removes the invalid samples from the sample list.
+        Also removes the original link to the root file, so recover cannot be run
+        (this might be changed in future implementations)
+        '''
+        for i in range(len(self.samples)):
+            if i < skip_first: continue
+            if i >= len(self.samples): break
+            td=copy.deepcopy(self.dataclass)
+            fullpath=self.getSamplePath(self.samples[i])
+            print('reading '+fullpath, str(self.sampleentries[i]), str(i), '/', str(len(self.samples)))
+            try:
+                td.readIn(fullpath)
+                for x in td.x:
+                    if td.nsamples != x.shape[0]:
+                        print("not right length")
+                        raise Exception("not right length")
+                for y in td.y:
+                    if td.nsamples != y.shape[0]:
+                        print("not right length")
+                        raise Exception("not right length")
+                
+                del td
+                continue
+            except Exception as e:
+                print('problem with file, removing ', fullpath)
+                del self.samples[i]
+                del self.originRoots[i]
+                self.nsamples -= self.sampleentries[i]
+                del self.sampleentries[i]
+                
+    def removeEntry(self,relative_path_to_entry):
+        for i in range(len(self.samples)):
+            if relative_path_to_entry==self.samples[i]:
+                print('removing '+self.samples[i]+" - "+str(self.sampleentries[i]))
+                del self.samples[i]
+                del self.originRoots[i]
+                self.nsamples -= self.sampleentries[i]
+                del self.sampleentries[i]
+                break
+                 
+        
+    def writeToFile(self,filename):
+        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as fd:
+            self.dataclass.clear()
+            pickle.dump(self.samples, fd,protocol=0 )
+            pickle.dump(self.sampleentries, fd,protocol=0 )
+            pickle.dump(self.originRoots, fd,protocol=0 )
+            pickle.dump(self.nsamples, fd,protocol=0 )
+            pickle.dump(self.useweights, fd,protocol=0 )
+            pickle.dump(self.__batchsize, fd,protocol=0 )
+            pickle.dump(self.dataclass, fd,protocol=0 )
+            pickle.dump(self.weighter, fd,protocol=0 )
+            #pickle.dump(self.means, fd,protocol=0 )
+            self.means.dump(fd)
+
+        shutil.move(fd.name, filename)
+        
+    ## for conversion essential!!!
+    def readRawFromFile(self,filename):
+        #no assumption on data class
+        fd=open(filename,'rb')
+        self.samples=pickle.load(fd)
+        self.sampleentries=pickle.load(fd)
+        self.originRoots=pickle.load(fd)
+        fd.close()
+        
+    def readFromFile(self,filename):
+        fd=open(filename,'rb')
+        self.samples=pickle.load(fd)
+        self.sampleentries=pickle.load(fd)
+        self.originRoots=pickle.load(fd)
+        self.nsamples=pickle.load(fd)
+        self.useweights=pickle.load(fd)
+        self.__batchsize=pickle.load(fd)
+        self.dataclass=pickle.load(fd)
+        self.weighter=pickle.load(fd)
+        self.means=pickle.load(fd)
+        fd.close()
+
+        self.dataDir=os.path.dirname(os.path.abspath(filename))
+        self.dataDir+='/'
+        #don't check if files exist
+        return 
+        for f in self.originRoots:
+            if not f.endswith(".root"): continue
+            if not os.path.isfile(f):
+                print('not found: '+f)
+                raise Exception('original root file not found')
+        for f in self.samples:
+            fpath=self.getSamplePath(f)
+            if not os.path.isfile(fpath):
+                print('not found: '+fpath)
+                raise Exception('sample file not found')
+        
+        
+    def readRootListFromFile(self, file, relpath=''):
+        self.samples=[]
+        self.sampleentries=[]
+        self.originRoots=[]
+        self.nsamples=0
+        self.dataDir=""
+        
+        fdir=os.path.dirname(file)
+        fdir=os.path.abspath(fdir)
+        fdir=os.path.realpath(fdir)
+        lines = [line.rstrip('\n') for line in open(file)]
+        for line in lines:
+            if len(line) < 1: continue
+            if relpath:
+                self.originRoots.append(os.path.join(relpath, line))
+            else:
+                self.originRoots.append(line)
+
+        if len(self.originRoots)<1:
+            raise Exception('root samples list empty')
+        
+        
+    def split(self,ratio):
+        '''
+        ratio is self/(out+self)
+        returns out
+        modifies itself
+        '''
+        
+        out=DataCollection()
+        itself=copy.deepcopy(self)
+        
+        nsamplefiles=len(self.samples)
+        
+        out.samples=[]
+        out.sampleentries=[]
+        out.originRoots=[]
+        out.nsamples=0
+        out.__batchsize=copy.deepcopy(self.__batchsize)
+        out.isTrain=copy.deepcopy(self.isTrain)
+        out.dataDir=self.dataDir
+
+        out.dataclass=copy.deepcopy(self.dataclass)
+        out.weighter=self.weighter #ref oks
+        out.means=self.means
+        out.useweights=self.useweights
+     
+        
+        itself.samples=[]
+        itself.sampleentries=[]
+        itself.originRoots=[]
+        itself.nsamples=0
+        
+        
+        
+        if nsamplefiles < 2:
+            out=copy.deepcopy(self)
+            print('DataCollection.split: warning: only one file, split will just return a copy of this')
+            return out
+    
+        for i in range(0, nsamplefiles):
+            frac=(float(i))/(float(nsamplefiles))
+            if frac < ratio and i < nsamplefiles-1:
+                itself.samples.append(self.samples[i])
+                itself.sampleentries.append(self.sampleentries[i])
+                itself.originRoots.append(self.originRoots[i])
+                itself.nsamples+=self.sampleentries[i]
+            else:
+                out.samples.append(self.samples[i])
+                out.sampleentries.append(self.sampleentries[i])
+                out.originRoots.append(self.originRoots[i])
+                out.nsamples+=self.sampleentries[i]
+           
+        
+        
+        self.samples=itself.samples
+        self.sampleentries=itself.sampleentries
+        self.originRoots=itself.originRoots
+        self.nsamples=itself.nsamples
+        
+        return out
+    
+    
+    def createTestDataForDataCollection(self, collectionfile, inputfile, outputDir,
+            outname='dataCollection.dc',
+            traind=None,
+            relpath=''):
+
+        self.readFromFile(collectionfile)
+        self.dataclass.remove=False
+        self.dataclass.weight=True #False
+        if traind: 
+            print('[createTestDataForDataCollection] dataclass is overriden by user request')
+            self.dataclass=traind
+        self.readRootListFromFile(inputfile, relpath=relpath)
+        self.createDataFromRoot(
+            self.dataclass, outputDir, False,
+            dir_check = not self.batch_mode
+        )
+        self.writeToFile(outputDir+'/'+outname)
+        
+    
+    def recoverCreateDataFromRootFromSnapshot(self, snapshotfile):
+        snapshotfile=os.path.abspath(snapshotfile)
+        self.readFromFile(snapshotfile)
+        td=self.dataclass
+        #For emergency recover  td.reducedtruthclasses=['isB','isC','isUDSG']
+        if len(self.originRoots) < 1:
+            return
+        #if not self.means:
+        #    self.means=td.produceMeansFromRootFile(self.originRoots[0])
+        outputDir=os.path.dirname(snapshotfile)+'/'
+        self.dataDir=outputDir
+        finishedsamples=len(self.samples)
+        
+        self.__writeData_async_andCollect(finishedsamples,outputDir)
+        self.writeToFile(outputDir+'/dataCollection.dc')
+        
+    
+    def createDataFromRoot(
+                    self, dataclass, outputDir, 
+                    redo_meansandweights=True, means_only=False, dir_check=True
+                    ):
+        '''
+        Also creates a file list of the output files
+        After the operation, the object will point to the already processed
+        files (not root files)
+        Writes out a snapshot of itself after every successfully written output file
+        to recover the data until a possible error occurred
+        '''
+        
+        if len(self.originRoots) < 1:
+            print('createDataFromRoot: no input root file')
+            raise Exception('createDataFromRoot: no input root file')
+        
+        outputDir+='/'
+        if os.path.isdir(outputDir) and dir_check:
+            raise Exception('output dir must not exist')
+        elif not os.path.isdir(outputDir):
+            os.mkdir(outputDir)
+        self.dataDir=outputDir
+        self.nsamples=0
+        self.samples=[]
+        self.sampleentries=[]
+        self.dataclass=copy.deepcopy(dataclass)
+        td=self.dataclass
+        ##produce weighter from a larger dataset as one file
+
+       
+        if redo_meansandweights and (td.remove or td.weight):
+            logging.info('producing weights and remove indices')
+            self.weighter = td.produceBinWeighter(
+                self.originRoots
+                )            
+            self.weighter.printHistos(outputDir)
+            
+        
+        if redo_meansandweights:
+            logging.info('producing means and norms')
+            self.means = td.produceMeansFromRootFile(
+                self.originRoots, limit=self.meansnormslimit
+                )
+        
+        if means_only: return
+
+        if self.batch_mode:
+            for sample in self.originRoots:
+                self.__writeData(sample, outputDir)
+        else:
+            self.__writeData_async_andCollect(0, outputDir)
+    
+    def __writeData(self, sample, outputDir):
+        sw=stopwatch()
+        td=copy.deepcopy(self.dataclass)
+        
+        fileTimeOut(sample,120) #once available copy to ram
+
+        if self.batch_mode or self.no_copy_on_convert:
+            tmpinput = sample
+
+            def removefile():
+                pass
+        else:
+            tmpinput = '/dev/shm/'+str(os.getpid())+os.path.basename(sample)
+            
+            def removefile():
+                os.system('rm -f '+tmpinput)
+            
+            import atexit
+            atexit.register(removefile)
+            
+            os_ret = os.system('cp '+sample+' '+tmpinput)
+            if os_ret:
+                raise Exception("copy to ramdisk not successful for "+sample)
+
+        try:
+            td.readFromRootFile(tmpinput, self.means, self.weighter)
+            sbasename = os.path.basename(sample)
+            newname = sbasename[:sbasename.rfind('.')]
+            
+            if usenewformat:
+                newname+='.meta'
+            else:
+                newname+='.z'
+            newpath=os.path.abspath(outputDir+newname)
+            td.writeOut(newpath)
+            print('converted and written '+newname+' in ',sw.getAndReset(),' sec')
+            self.samples.append(newname)
+            self.nsamples+=td.nsamples
+            self.sampleentries.append(td.nsamples)
+            td.clear()
+
+            if not self.batch_mode:
+                self.writeToFile(outputDir+'/snapshot.dc')
+                
+        finally:
+            removefile()
+        
+    def __writeData_async_andCollect(self, startindex, outputDir):
+        
+        #set tree name to use
+        logger.info('setTreeName')
+        import DeepJetCore.preprocessing
+        DeepJetCore.preprocessing.setTreeName(self.dataclass.treename)
+        
+        from multiprocessing import Process, Queue, cpu_count, Lock
+        wo_queue = Queue()
+        writelock=Lock()
+        thispid=str(os.getpid())
+        if not self.batch_mode and not os.path.isfile(outputDir+'/snapshot.dc'):
+            self.writeToFile(outputDir+'/snapshot.dc')
+        
+        tempstoragepath='/dev/shm/'+thispid
+        
+        logger.info('creating dir '+tempstoragepath)
+        os.system('mkdir -p '+tempstoragepath)
+        
+        def writeData_async(index,woq,wrlck):
+
+            logger.info('async started')
+            
+            sw=stopwatch()
+            td=copy.deepcopy(self.dataclass)
+            sample=self.originRoots[index]
+
+            if self.batch_mode or self.no_copy_on_convert:
+                tmpinput = sample
+
+                def removefile():
+                    pass
+            else:
+                tmpinput = tempstoragepath+'/'+str(os.getpid())+os.path.basename(sample)
+                
+                def removefile():
+                    os.system('rm -f '+tmpinput)
+                
+                import atexit
+                atexit.register(removefile)
+
+                logger.info('start cp')
+                os_ret=os.system('cp '+sample+' '+tmpinput)
+                if os_ret:
+                    raise Exception("copy to ramdisk not successful for "+sample)
+                
+            success=False
+            out_samplename=''
+            out_sampleentries=0
+            sbasename = os.path.basename(sample)
+            newname = sbasename[:sbasename.rfind('.')]
+            if usenewformat:
+                newname+='.meta'
+            else:
+                newname+='.z'
+            newpath=os.path.abspath(outputDir+newname)
+            
+            try:
+                logger.info('readFromRootFile')
+                td.readFromRootFile(tmpinput, self.means, self.weighter)
+                logger.info('writeOut')
+                #wrlck.acquire()
+                td.writeOut(newpath)
+                #wrlck.release()
+                print('converted and written '+newname+' in ',sw.getAndReset(),' sec -', index)
+                
+                out_samplename=newname
+                out_sampleentries=td.nsamples
+                success=True
+                td.clear()
+                removefile()
+                woq.put((index,[success,out_samplename,out_sampleentries]))
+                
+            except:
+                print('problem in '+newname)
+                removefile()
+                woq.put((index,[False,out_samplename,out_sampleentries]))
+                raise 
+        
+        def __collectWriteInfo(successful,samplename,sampleentries,outputDir):
+            if not successful:
+                raise Exception("write not successful, stopping")
+
+            self.samples.append(samplename)
+            self.nsamples+=sampleentries
+            self.sampleentries.append(sampleentries)
+            if not self.batch_mode:
+                self.writeToFile(outputDir+'/snapshot_tmp.dc')#avoid to overwrite directly
+                os.system('mv '+outputDir+'/snapshot_tmp.dc '+outputDir+'/snapshot.dc')
+            
+        processes=[]
+        processrunning=[]
+        processfinished=[]
+        for i in range(startindex,len(self.originRoots)):
+            processes.append(Process(target=writeData_async, args=(i,wo_queue,writelock) ) )
+            processrunning.append(False)
+            processfinished.append(False)
+        
+        nchilds = int(cpu_count()/2)-2 if self.nprocs <= 0 else self.nprocs
+        #if 'nvidiagtx1080' in os.getenv('HOSTNAME'):
+        #    nchilds=cpu_count()-5
+        if nchilds<1: 
+            nchilds=1
+        
+        #nchilds=10
+        
+        
+        
+        lastindex=startindex-1
+        alldone=False
+        results=[]
+
+        try:
+            while not alldone:
+                nrunning=0
+                for runs in processrunning:
+                    if runs: nrunning+=1
+                
+                for i in range(len(processes)):
+                    if nrunning>=nchilds:
+                        break
+                    if processrunning[i]:continue
+                    if processfinished[i]:continue
+                    time.sleep(0.1)
+                    logging.info('starting %s...' % self.originRoots[startindex+i])
+                    processes[i].start()
+                    processrunning[i]=True
+                    nrunning+=1
+                    
+                
+                
+                if not wo_queue.empty():
+                    res=wo_queue.get()
+                    results.append(res)
+                    originrootindex=res[0]
+                    logging.info('finished %s...' % self.originRoots[originrootindex])
+                    processfinished[originrootindex-startindex]=True
+                    processes      [originrootindex-startindex].join(5)
+                    processrunning [originrootindex-startindex]=False  
+                    #immediately send the next
+                    continue
+                  
+                results = sorted(results, key=lambda x:x[0])    
+                for r in results:
+                    thisidx=r[0]
+                    if thisidx==lastindex+1:
+                        logging.info('>>>> collected result %d of %d' % (thisidx+1,len(self.originRoots)))
+                        __collectWriteInfo(r[1][0],r[1][1],r[1][2],outputDir)
+                        lastindex=thisidx        
+                
+                if nrunning==0:
+                    alldone=True
+                    continue
+                time.sleep(0.1)
+                  
+        except:
+            os.system('rm -rf '+tempstoragepath)
+            raise 
+        os.system('rm -rf '+tempstoragepath)
+        
+    def convertListOfRootFiles(self, inputfile, dataclass, outputDir, 
+            takemeansfrom='', means_only=False,
+            output_name='dataCollection.dc',
+            relpath=''):
+        
+        newmeans=True
+        if takemeansfrom:
+            self.readFromFile(takemeansfrom)
+            newmeans=False
+        self.readRootListFromFile(inputfile, relpath=relpath)
+        self.createDataFromRoot(
+                    dataclass, outputDir, 
+                    newmeans, means_only = means_only, 
+                    dir_check= not self.batch_mode
+                    )
+        self.writeToFile(outputDir+'/'+output_name)
+        
+    def getAllLabels(self):
+        return self.__stackData(self.dataclass,'y')
+    
+    def getAllFeatures(self):
+        return self.__stackData(self.dataclass,'x')
+        
+    def getAllWeights(self):
+        return self.__stackData(self.dataclass,'w')
+    
+    
+    def getSamplePath(self,samplefile):
+        #for backward compatibility
+        if samplefile[0] == '/':
+            return samplefile
+        return self.dataDir+'/'+samplefile
+    
+    def __stackData(self, dataclass, selector):
+        td=dataclass
+        out=[]
+        firstcall=True
+        for sample in self.samples:
+            td.readIn(self.getSamplePath(sample))
+            #make this generic
+            thislist=[]
+            if selector == 'x':
+                thislist=td.x
+            if selector == 'y':
+                thislist=td.y
+            if selector == 'w':
+                thislist=td.w
+               
+            if firstcall:
+                out=thislist
+                firstcall=False
+            else:
+                for i in range(0,len(thislist)):
+                    if selector == 'w':
+                        out[i] = np.append(out[i],thislist[i])
+                    else:
+                        out[i] = np.vstack((out[i],thislist[i]))
+                
+        return out
+    
+        
+    def replaceTruthForGAN(self, generated_array, original_truth):
+        return self.dataclass.replaceTruthForGAN(generated_array, original_truth)
+        
+    def generator(self):
+        from sklearn.utils import shuffle
+        import uuid
+        import threading
+        print('start generator')
+        #helper class
+        class tdreader(object):
+            def __init__(self,filelist,maxopen,tdclass):
+                
+                self.filelist=filelist
+                self.nfiles=len(filelist)
+                
+                self.tdlist=[]
+                self.tdopen=[]
+                self.tdclass=copy.deepcopy(tdclass)
+                self.tdclass.clear()#only use the format, no data
+                #self.copylock=thread.allocate_lock()
+                for i in range(self.nfiles):
+                    self.tdlist.append(copy.deepcopy(tdclass))
+                    self.tdopen.append(False)
+                    
+                self.closeAll() #reset state
+                self.shuffleseed=0
+                
+            def start(self):
+                self.__readNext()
+                    
+            def __readNext(self):
+                #make sure this fast function has exited before getLast tries to read the file
+                readfilename=self.filelist[self.filecounter]
+                if len(filelist)>1:
+                    self.tdlist[self.nextcounter].clear()
+                self.tdlist[self.nextcounter]=copy.deepcopy(self.tdclass)
+                self.tdlist[self.nextcounter].readthread=None
+                
+                def startRead(counter,filename,shuffleseed):   
+                    excounter=0
+                    while excounter<10:
+                        try:
+                            self.tdlist[counter].readIn_async(filename,ramdiskpath='/dev/shm/',
+                                                              randomseed=shuffleseed)
+                            break
+                        except Exception as d:
+                            print(self.filelist[counter]+' read error, retry...')
+                            self.tdlist[counter].readIn_abort()
+                            excounter=excounter+1
+                            if excounter<10:
+                                time.sleep(5)
+                                continue
+                            traceback.print_exc(file=sys.stdout)
+                            raise d
+                    
+                # don't remove these commented lines just yet 
+                # the whole generator call is moved to thread since keras 2.0.6 anyway  
+                #t=threading.Thread(target=startRead, args=(self.nextcounter,readfilename,self.shuffleseed))    
+                #t.start()
+                startRead(self.nextcounter,readfilename,self.shuffleseed)
+                self.shuffleseed+=1
+                if self.shuffleseed>1e5:
+                    self.shuffleseed=0
+                #startRead(self.nextcounter,readfilename,self.shuffleseed)
+                self.tdopen[self.nextcounter]=True
+                self.filecounter=self.__increment(self.filecounter,self.nfiles,to_shuffle=True)
+                self.nextcounter=self.__increment(self.nextcounter,self.nfiles)
+                
+                
+                
+            def __getLast(self):
+                #print('joining...') #DEBUG PERF
+                self.tdlist[self.lastcounter].readIn_join(wasasync=True,waitforStart=True)
+                #print('joined') #DEBUG PERF
+                td=self.tdlist[self.lastcounter]
+                #print('got ',self.lastcounter)
+                
+                self.tdopen[self.lastcounter]=False
+                self.lastcounter=self.__increment(self.lastcounter,self.nfiles)
+                return td
+                
+            def __increment(self,counter,maxval,to_shuffle=False):
+                counter+=1
+                if counter>=maxval:
+                    counter=0
+                    if to_shuffle:
+                        self.filelist = shuffle(self.filelist)
+                return counter 
+            
+            def __del__(self):
+                self.closeAll()
+                
+            def closeAll(self):
+                for i in range(len(self.tdopen)):
+                    try:
+                        if self.tdopen[i]:
+                            self.tdlist[i].readIn_abort()
+                            self.tdlist[i].clear()
+                            self.tdopen[i]=False
+                    except: pass
+                    self.tdlist[i].removeRamDiskFile()
+                
+                self.nextcounter=0
+                self.lastcounter=0
+                self.filecounter=0
+                
+            def get(self):
+                
+                td=self.__getLast()
+                self.__readNext()
+                return td
+                
+        
+        td=(self.dataclass)
+        totalbatches=self.getNBatchesPerEpoch()
+        processedbatches=0
+        
+        ####generate randoms by batch
+        batchgen=None
+        if hasattr(td,'generatePerBatch') and td.generatePerBatch:
+            ranges=td.generatePerBatch
+            batchgen=BatchRandomInputGenerator(ranges, self.__batchsize)
+        
+        xstored=[np.array([])]
+        dimx=0
+        ystored=[]
+        dimy=0
+        wstored=[]
+        dimw=0
+        nextfiletoread=0
+        
+        target_xlistlength=len(td.getInputShapes())
+        
+        xout=[]
+        yout=[]
+        wout=[]
+        samplefilecounter=0
+        
+        #prepare file list
+        filelist=[]
+        for s in self.samples:
+            filelist.append(self.getSamplePath(s))
+        
+        TDReader=tdreader(filelist, self.maxFilesOpen, self.dataclass)
+        
+        #print('generator: total batches '+str(totalbatches))
+        print('start file buffering...')
+        TDReader.start()
+        #### 
+        #
+        # make block class for file read with get function that starts the next read automatically
+        # and closes all files in destructor?
+        #
+        #  check if really the right ones are read....
+        #
+        psamples=0 #for random shuffling
+        nepoch=0
+        shufflecounter=0
+        shufflecounter2=0
+        while 1:
+            if processedbatches == totalbatches:
+                processedbatches=0
+                nepoch+=1
+                shufflecounter2+=1
+            
+            lastbatchrest=0
+            if processedbatches == 0: #reset buffer and start new
+                #print('DataCollection: new turnaround')
+                xstored=[np.array([])]
+                dimx=0
+                ystored=[]
+                dimy=0
+                wstored=[]
+                dimw=0
+                lastbatchrest=0
+                
+            
+            else:
+                lastbatchrest=xstored[0].shape[0]
+            
+            batchcomplete=False   
+            
+            if lastbatchrest >= self.__batchsize:
+                batchcomplete = True
+                
+            # if(xstored[1].ndim==1):
+                
+            while not batchcomplete:
+                import sys, traceback
+                try:
+                    td=TDReader.get()
+                except:
+                    traceback.print_exc(file=sys.stdout)
+                    
+                if td.x[0].shape[0] == 0:
+                    print('Found empty (corrupted?) file, skipping')
+                    continue
+                
+                if xstored[0].shape[0] ==0:
+                    #print('dc:read direct') #DEBUG
+                    xstored=td.x
+                    dimx=len(xstored)
+                    ystored=td.y
+                    dimy=len(ystored)
+                    wstored=td.w
+                    dimw=len(wstored)
+                    if not self.useweights:
+                        dimw=0
+                    xout=[]
+                    yout=[]
+                    wout=[]
+                    for i in range(0,dimx):
+                        xout.append([])
+                    for i in range(0,dimy):
+                        yout.append([])
+                    for i in range(0,dimw):
+                        wout.append([])
+                        
+                else:
+                    
+                    #randomly^2 shuffle - not needed every time
+                    if shufflecounter>1 and shufflecounter2>1:
+                        shufflecounter=0
+                        shufflecounter2=0
+                        for i in range(0,dimx):
+                            td.x[i]=shuffle(td.x[i], random_state=psamples)
+                        for i in range(0,dimy):
+                            td.y[i]=shuffle(td.y[i], random_state=psamples)
+                        for i in range(0,dimw):
+                            td.w[i]=shuffle(td.w[i], random_state=psamples)
+                    
+                    shufflecounter+=1
+                    
+                    for i in range(0,dimx):
+                        if(xstored[i].ndim==1):
+                            xstored[i] = np.append(xstored[i],td.x[i])
+                        else:
+                            xstored[i] = np.vstack((xstored[i],td.x[i]))
+                    
+                    for i in range(0,dimy):
+                        if(ystored[i].ndim==1):
+                            ystored[i] = np.append(ystored[i],td.y[i])
+                        else:
+                            ystored[i] = np.vstack((ystored[i],td.y[i]))
+                    
+                    for i in range(0,dimw):
+                        if(wstored[i].ndim==1):
+                            wstored[i] = np.append(wstored[i],td.w[i])
+                        else:
+                            wstored[i] = np.vstack((wstored[i],td.w[i]))
+                    
+                if xstored[0].shape[0] >= self.__batchsize:
+                    batchcomplete = True
+                    
+                #limit of the random generator number 
+                psamples+=  td.x[0].shape[0]   
+                if psamples > 4e8:
+                    psamples/=1e6
+                    psamples=int(psamples)
+                
+                td.clear()
+                
+            if batchcomplete:
+                
+                #print('batch complete, split')#DEBUG
+                
+                for i in range(0,dimx):
+                    splitted=np.split(xstored[i],[self.__batchsize])
+                    xstored[i] = splitted[1]
+                    xout[i] = splitted[0]
+                for i in range(0,dimy):
+                    splitted=np.split(ystored[i],[self.__batchsize])
+                    ystored[i] = splitted[1]
+                    yout[i] = splitted[0]
+                for i in range(0,dimw):
+                    splitted=np.split(wstored[i],[self.__batchsize])
+                    wstored[i] = splitted[1]
+                    wout[i] = splitted[0]
+            
+            for i in range(0,dimx):
+                if(xout[i].ndim==1):
+                    xout[i]=(xout[i].reshape(xout[i].shape[0],1)) 
+                if not xout[i].shape[1] >0:
+                    raise Exception('serious problem with the output shapes!!')
+                            
+            for i in range(0,dimy):
+                if(yout[i].ndim==1):
+                    yout[i]=(yout[i].reshape(yout[i].shape[0],1))
+                if not yout[i].shape[1] >0:
+                    raise Exception('serious problem with the output shapes!!')
+                    
+            for i in range(0,dimw):
+                if(wout[i].ndim==1):
+                    wout[i]=(wout[i].reshape(wout[i].shape[0],1))
+                if not xout[i].shape[1] >0:
+                    raise Exception('serious problem with the output shapes!!')
+            
+            processedbatches+=1
+            
+            
+            if batchgen:
+                if len(xout)<target_xlistlength:
+                    xout.append(batchgen.generateBatch())
+                else:
+                    xout[-1]=batchgen.generateBatch()
+                    
+            if self.useweights:
+                yield (xout,yout,wout)
+            else:
+                yield (xout,yout)
+            
+            
+
+    
+    
+    
+    
diff --git a/README.md b/README.md
index 50c9ae7..cfe8b59 100644
--- a/README.md
+++ b/README.md
@@ -1,63 +1,209 @@
+Notes
+============
+Due to a recent update of uproot, please change ``import uproot`` to ``import uproot3`` in case you have issues with the import and the newest containers.
+
 
 
 DeepJetCore: Package for training and evaluation of deep neural networks for HEP
 ===============================================================================
 
+
+(For upgrading from DeepJetCore 1.X to 2.X/3.X, please scroll to the bottom)
+
 This package provides the basic functions for out-of-memory training, resampling, and basic evaluation. 
-The actual training data structures and DNN models must be defined in an additional user package. The data structures (defining the structure of the training data as numpy arrays), must inherit from the TrainData class, and must be reachable in the PYTHONPATH as "from datastructure import * " .
-A script to set it up will be provided eventually. For reference, please see: 
-https://github.com/DL4Jets/DeepJet/tree/master/modules
+For simple use cases, only two files need to be adapted: the actual training data structures, describing how to fill numpy arrays from root trees, and the DNN model itself. Both must be defined in an additional user package. 
+Please refer to the Section 'Usage' for more information.
+
+**If you use DeepJetCore or the containers, please mention it in (internal) documentation and talks and cite the zenodo entry:**
 
+https://zenodo.org/record/3670882/export/hx
 
-Setup python packages (CERN)
+
+Setup
 ==========
-It is essential to perform all these steps on lxplus7. Simple ssh to 'lxplus7' instead of 'lxplus'
 
-Pre-Installtion: Anaconda setup (only once)
-Download miniconda3
+The package comes with a docker file in the subdirectory docker, which will set up a container with the needed environment.
+
+
+**Users with access to cvmfs** (e.g. on lxplus or other CERN machines) will find a pre-built singularity container here:
+``/cvmfs/unpacked.cern.ch/registry.hub.docker.com/cernml4reco/deepjetcore3:latest``
+In case of doubt, just check if this path exists on the machine you are working on.
+
+A good way to enter the container interactively, mounting for example the standard directories on lxplus or similar machines would be:
 ```
-cd <afs work directory: you need some disk space for this!>
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-bash Miniconda3-latest-Linux-x86_64.sh
+#!/bin/bash
+
+gpuopt=""
+files=$(ls -l /dev/nvidia* 2> /dev/null | egrep -c '\n')
+if [[ "$files" != "0" ]]
+then
+gpuopt="--nv"
+fi
+
+#this is a singularity problem only fixed recently
+unset LD_LIBRARY_PATH
+unset PYTHONPATH
+sing=`which singularity`
+unset PATH
+cd
+
+$sing run -B /eos -B /afs $gpuopt /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cernml4reco/deepjetcore3:latest
 ```
-Please follow the installation process. If you don't know what an option does, please answer 'yes'.
-After installation, you have to log out and log in again for changes to take effect.
-If you don't use bash, you might have to add the conda path to your .rc file
+
+The cache dir can get rather large and is normally located at ~/.singularity/cache. To avoid filling up the home afs, the cache can be set to /tmp or the work afs. Once the container is fully closed, the cache can be safely deleted. Singularity reacts to environment variables, e.g.
+
 ```
-export PATH="<your miniconda directory>/miniconda3/bin:$PATH"
+export SINGULARITY_CACHEDIR="/tmp/$(whoami)/singularity"
 ```
-This has to be only done once.
 
+Sometimes you need to try a few times - singularity is a bit weird. But once the container is launched, everything works smoothly.
+The message about a missing user group can be safely ignored.
 
-Installation:
+**It is important** that your bashrc does not change or reset the ``LD_LIBRARY`` or ``PYTHONPATH`` environment variables. Also **remove any anaconda paths from your bashrc**, because they will reset ``LD_LIBRARY`` and ``PYTHONPATH``. The system needs to be in a clean environment state within the container (as it should be).
 
-```
-mkdir <your working dir>
-cd <your working dir>
-git clone https://github.com/DL4Jets/DeepJetCore
-cd DeepJetCore/environment
-./setupEnv.sh deepjetLinux3.conda
-```
-For enabling gpu support add 'gpu' as an additional option to the last command.
-This will take a while. Please log out and in again once the installation is finised.
 
-Compiling DeepJetCore
-===========
+**Building / using the container** 
+For users without access to cvmfs, the container can be built manually or pulled from dockerhub. For every release, there is a container on dockerhub, the latest release is tagged. The ``latest`` tag refers to the developing master branch.
+``cernml4reco/deepjetcore3:latest``
+It can be pulled using docker, or pulled from singularity. For details, please see the corresponding docker or singularity documentation.
+To build the container, the files can be found in the ``docker`` subdirectory. It is a two-stage process. First the container ``Dockerfile_base`` needs to be built, containing the basic system packages. This will take a while, because root is being compiled within. The output container name must be ``cernml4reco/djcbase:cu11.1``. In the next step, the actual DeepJetCore container should be built on top. **Please make sure to use a release, not the developing master branch!**
 
-When the installation was successful, the DeepJetCore tools need to be compiled.
+
+**Building with conda**
+In case of lack of access to singularity or docker, it's possible to build the environment using conda. This _should_ work anywhere since conda
+pulls tensorflow's CUDA dependencies.;
 ```
-cd <your working dir>
+git clone https://github.com/DL4Jets/DeepJetCore.git
 cd DeepJetCore
-source lxplus_env.sh / gpu_env.sh
-cd compiled
-make -j4
+# This will take some time (includes ROOT, tensorflow-gpu, CUDA...)
+conda env create -f conda_env.yml  # if that fails use conda_env_exact.yml with entirely fixed versions
+conda activate DJC2
+# Build compiled modules with a modified Makefile
+source docker_env.sh # sets up paths, never mind the name
+cd compiled 
+make -f Makefile_conda -j4 # conda needs slightly different lib names
 ```
 
-After successfully compiling the tools, log out and in again.
-The environment is set up.
-
 
 Usage
 ==========
 
-For a practical example application of the DeepJetCore package, please refer to https://github.com/DL4Jets/DeepJet
+DeepJetCore is only a set of tools and wrappers and does not provide ready-to-use training code.
+However, an example package containing more specific code examples (referred to as 'subpackage' in the following) can be created once the container is launched using the script ``createSubpackage.py``.
+This subpackage includes an example dataset which gets generated on the fly (size about 150 MB) using the ``--data`` option.
+**More instructions are printed by the script creating the subpackage and serve as documentation for a simple training.**
+This subpackage can serve as a reference for own projects.
+In general, the following steps are needed for a training and evaluation:
+
+  * Always source the environment script (``env.sh``) in the subpackage directory, NOT in DeepJetCore.
+  * Define the training data structure, e.i. which branches from the input root ntuples are read, and how they are re-organised as input to the DNN. An example can be found in subpackage/modules/datastructures
+  * Convert the root ntuples into the training data format using convertFromSource.py. Please consider the help message for options (``convertFromSource.py -h``). The input is a list of root files contained in a text file. An example dataset is generated when setting up the subpackage. It can be found in subpackage/example_data. 
+  Different files should be used for training and testing. To convert the training data, execute ``convertFromSource.py -i <input text file> -o <output dir for training files> -c TrainData_example``, with TrainData_example being the data structure defined before. The test data will be directly read by from the source files (see below).
+  * Train the model. The convertFromSource.py script creates a set of output files and a descriptor (dataCollection.djcdc). This descriptor is fed to the training file which contains the model definition and an instance of the trainingbase class. An example is given in subpackage/Train/training_example.py. 
+  This file is called with ``python3 training_example.py /path/to/data/dataCollection.djcdc <output dir for the model etc.>``. More options are provided and described when calling ``python3 training_example.py -h``.
+  * Once the training is done, the model can be used to predict the output of the model for the test data: ``predict.py /path/to/the/model.h5 /path/to/the/training/dataCollection.djcdc <text file containing list of source test files> <output directoy for prediction>``. Please keep in mind that the output can become large (not the case in the example). 
+  * For plotting, there are a few simple wrappers provided, which can be found in DeepJetCore/evaluation/evaluation.py, for making ROCs and simple plots. As input, these functions take the text file created by the predict.py script.
+
+
+The general pipeline for training is depicted in the following sketch:
+![pipeline](https://github.com/DL4Jets/DeepJetCore/blob/master/training_pipeline.png "Data pipeline for training")
+
+
+The general pipeline for inference/prediction is depicted in the following sketch:
+![pipeline](https://github.com/DL4Jets/DeepJetCore/blob/master/predict_pipeline.png "Data pipeline for prediction")
+
+More information on the three function of TrainData that need to be defined by the user (in addition to the training script) is given in the next Section.
+For the training script, please refer to the example provided with ``createSubpackage.py``. 
+
+TrainData definition and notes on upgrading (from 1.X to 2.X/3.X)
+=========================
+
+There has been substantial format changes from 1.X to 2.X, including low-level support preparations for ragged tensors. Therefore, all data from 1.X needs to be converted or newly created. Also, the interface changed slightly.
+
+The master branch has also been switched to tensorflow 2.0 recently. This might require adapting subpackages. A workaround for the moment for old subpackages would be to include the following code in the subpackage ``__init__.py``:
+
+```
+import sys
+import tensorflow 
+tensorflow.compat.v1.disable_eager_execution()
+sys.modules["tensorflow"]=tensorflow.compat.v1
+```
+
+The TrainData class has been slimmed significantly. Now, the ``__init__`` function does not need any additional arguments anymore, and there are no mandatory definitions. Only the following functions should be defined for the interface (all others are deprecated):
+
+  * ``createWeighterObjects(self, allsourcefiles)``: is not mandatory. It can be used, however to create a dictionary (pickable) objects that depend on the whole dataset (e.g. for numbers for normalisation etc). **Returns**: a dictionary of weighter objects
+  * ``convertFromSourceFile(self, filename, weighterobjects, istraining, **kwargs)``: is mandatory. This function defines a rule to convert one source file to one output file. The final output should be a list of numpy feature arrays, a list of numpy truth arrays, and a list of numpy weight arrays. The latter can also be empty. The conversion can be done from root e.g. with uproot or similar, but can also use any other input format. **Returns** three items: a list of feature arrays, list of truth arrays, list of weight arrays
+  * ``writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile)``: is mandatory. Defines how the output of the network should be written back to an output format. This could e.g. be a root tree, which can be a friend to the original tree, or any other output. The function gives optional access to all input features, truth, weights (if any), and the input source file name. **Returns**: nothing
+
+Of course any user function, member etc beyond that can be defined, too.
+
+For everybody who used the built-in weighting functionality, this function now need to be reimplemented in the user code (in ``createWeighterObjects``). The old implementation can be found here: https://github.com/DL4Jets/DeepJetCore/blob/1.X/TrainData.py#L657-L672 and can be used in the same way in the user code.
+
+Ragged Tensors
+=====================
+
+Ragged data structures supported to some extent with workarounds for places where either keras or tensorflow are still missing support.
+Only tensors ragged in the first dimension are supported so far, meaning, e.g. a different number of inputs per event, but each input having the same feature length (or more dimensions with fixed sizes). This should cover most usecases already.
+To create a ragged data structure, the function ``convertFromSourceFile(self, filename, weighterobjects, istraining, **kwargs)`` must not return a list of numpy arrays, but a list of ``DeepJetCore.SimpleArray``, which is a DeepJetCore class supporting ragged structures. This array can be constructed from two numpy arrays: one describing the row splits (as int, in the same format as tensorflow ragged array row splits) and one containing the data, where the 0th and 1st dimensions are flattened. Then the array can be filled by invoking ``createFromNumpy(data, row_splits``.
+For training, the model will receive a list of tensors, where the first one will be the data and the second the row splits. The same applies to the truth. This circumvents incomplete keras support for the moment.
+
+
+Using the data pipeline directly
+=====================
+
+The data pipeline can also be used without the training_base class sourrounding it. The DataCollection can be opened and a generator function can be invoked that returns the data batch-by-batch. An example is given here:
+
+```
+from DeepJetCore.DataCollection import DataCollection
+train_data = DataCollection("path/to/your/dataCollection.dc")
+
+# splits off 10% of the training dataset for validation. Can be used in the same way as train_data
+val_data=train_data.split(0.9) 
+
+# Set the batch size. 
+# If the data is ragged in dimension 1 (see convert options), 
+# then this is the maximum number of elements per batch, which could be distributed differently
+# to individual examples. E.g., if the first example has 50 elements, the second 48, and the third 30,
+# and the batch size is set to 100, it would return the first two examples (in total 99 elements) in 
+# the first batch etc. This is helpful to avoid out-of-memory errors during training
+
+train_data.setBatchSize(100) 
+
+# prepare the generator
+
+gen = train_data.invokeGenerator()
+
+# loop over epochs here ...
+
+gen.shuffleFileList()
+gen.prepareNextEpoch()
+
+# this number can differ from epoch to epoch for ragged data!
+nbatches = gen.getNBatches()
+generator=gen.feedNumpyData()
+
+for b in range(nbatches):
+    
+    #should not happen unless files are broken (will give additional errors)
+    if gen.isEmpty():
+        raise Exception("ran out of data") 
+    
+    # weights are optional, each of these is a list of numpy arrays
+    features_list, truth_list,  weight_list = next(generator)
+    
+    # do your training
+    
+    
+# end epoch loop
+
+```
+
+
+For developers
+=====================
+
+For developing the best strategy is to create a container or just run it, and then redirect the paths etc to the development directory. This can be done conveniently by sourcing the file ``docker_env.sh``. After sourcing the environment, the package needs to be compiled by running ``make -j5`` in the ``compiled`` directory.
+
+
+
+
diff --git a/SimpleArray.py b/SimpleArray.py
new file mode 100644
index 0000000..dc63739
--- /dev/null
+++ b/SimpleArray.py
@@ -0,0 +1,110 @@
+ 
+from DeepJetCore.compiled.c_simpleArray import simpleArrayF, simpleArrayI
+import numpy as np
+
+class SimpleArray(object):
+    
+    def __init__(self, nparr=None, nprs=np.array([],dtype='int64'), dtype='float32', name=""):
+        
+        assert nparr is not None or dtype is not None
+        self.dtype=None
+        if nparr is not None:
+            dtype = str(nparr.dtype)
+        self._setDtype(dtype)
+        if nparr is not None:
+            self.createFromNumpy(nparr, nprs)
+        self.setName(name)
+        
+    def __eq__(self,other):
+        if self.sa.dtypeI() != other.sa.dtypeI():
+            return False
+        return self.sa == other.sa
+            
+    def _setDtype(self,dtype):
+        assert dtype=='float32' or dtype=='int32'
+        if dtype=='float32':
+            self.sa = simpleArrayF()
+        elif dtype=='int32':
+            self.sa = simpleArrayI()
+        self.dtype = dtype
+            
+    #now pass through all the other member functions transparently
+    
+    def set(self,*args):
+        self.sa.set(*args)
+        
+    def setName(self, namestr: str):
+        self.sa.setName(namestr)
+        
+    def setFeatureNames(self, names: list):
+        self.sa.setFeatureNames(names)
+
+    def name(self):
+        return self.sa.name()
+    
+    def featureNames(self):
+        return self.sa.featureNames()
+    
+    def shape(self):
+        return self.sa.shape()
+    
+    def hasNanOrInf(self):
+        return self.sa.hasNanOrInf()
+
+    def readFromFile(self,filename):
+        dt = self.sa.readDtypeFromFile(filename)
+        self._setDtype(dt)
+        return self.sa.readFromFile(filename)
+    
+    def writeToFile(self,*args):
+        return self.sa.writeToFile(*args) 
+        
+    def assignFromNumpy(self,*args):
+        return self.sa.assignFromNumpy(*args)
+        
+    def createFromNumpy(self, nparr, nprs=np.array([],dtype='int64')):
+        name = self.name()
+        fnames = self.featureNames()
+        self._setDtype(str(nparr.dtype))
+        if nprs.dtype == 'int32':
+            self.sa.createFromNumpy(nparr, nprs.as_type('int64')) 
+        else:
+            self.sa.createFromNumpy(nparr, nprs) 
+        self.setName(name)
+        self.setFeatureNames(fnames)
+    
+    def copyToNumpy(self, pad_rowsplits=False):
+        return self.sa.copyToNumpy(pad_rowsplits) 
+    
+    def transferToNumpy(self, pad_rowsplits=False):
+        return self.sa.transferToNumpy(pad_rowsplits) 
+    
+    def isRagged(self,*args):
+        return self.sa.isRagged(*args) 
+    
+    def split(self,*args):
+        spl = SimpleArray()
+        spl._setDtype(self.dtype)
+        spl.sa = self.sa.split(*args)
+        return spl
+    
+    def getSlice(self,*args):
+        spl = SimpleArray()
+        spl._setDtype(self.dtype)
+        spl.sa = self.sa.getSlice(*args)
+        return spl
+    
+    def append(self,other):
+        assert self.sa.dtypeI() == other.sa.dtypeI()
+        return self.sa.append(other.sa)
+    
+    def cout(self,*args):
+        return self.sa.cout(*args)
+    
+    def size(self,*args):
+        return self.sa.size(*args)
+    
+    def copy(self):
+        arr,rs = self.sa.copyToNumpy(False)
+        return SimpleArray(arr,rs)
+    
diff --git a/TrainData.py b/TrainData.py
index 934b66c..6753c40 100644
--- a/TrainData.py
+++ b/TrainData.py
@@ -2,22 +2,17 @@
 Created on 20 Feb 2017
 
 @author: jkiesele
-'''
-
-from __future__ import print_function
 
+New (post equals 2.1) version
+'''
 
-from Weighter import Weighter
-from pdb import set_trace
-import numpy
+import os
+import numpy as np
 import logging
 
-import threading
-import multiprocessing
-
-threadingfileandmem_lock=threading.Lock()
-#threadingfileandmem_lock.release()
-#multiproc_fileandmem_lock=multiprocessing.Lock()
+from DeepJetCore.compiled.c_trainData import trainData
+from DeepJetCore.SimpleArray import SimpleArray
+import time
 
 def fileTimeOut(fileName, timeOut):
     '''
@@ -25,691 +20,102 @@ def fileTimeOut(fileName, timeOut):
     waits until the dir, the file should be stored in/read from, is accessible
     again, or the the timeout
     '''
-    import os
     filepath=os.path.dirname(fileName)
     if len(filepath) < 1:
         filepath = '.'
     if os.path.isdir(filepath):
         return
-    import time
+
     counter=0
     print('file I/O problems... waiting for filesystem to become available for '+fileName)
     while not os.path.isdir(filepath):
         if counter > timeOut:
-            print('...file could not be opened within '+str(timeOut)+ ' seconds')
+            raise Exception('...file could not be opened within '+str(timeOut)+ ' seconds')
         counter+=1
         time.sleep(1)
 
+#inherit from cpp class, just slim wrapper
 
-def _read_arrs_(arrwl,arrxl,arryl,doneVal,fileprefix,tdref=None,randomSeed=None):
-    import gc
-    gc.collect()
-
-    import h5py
-    from sklearn.utils import shuffle
-    try:
-        idstrs=['w','x','y']
-        h5f = h5py.File(fileprefix,'r')
-        alllists=[arrwl,arrxl,arryl]
-        for j in range(len(idstrs)):
-            fidstr=idstrs[j]
-            arl=alllists[j]
-            for i in range(len(arl)):
-                idstr=fidstr+str(i)
-                h5f[idstr].read_direct(arl[i])
-                #shuffle each read-in, but each array with the same seed (keeps right asso)
-                if randomSeed:
-                    arl[i]=shuffle(arl[i], random_state=randomSeed)
-                
-        doneVal.value=True
-        h5f.close()
-        del h5f
-    except Exception as d:
-        raise d
-    finally:
-        if tdref:
-            tdref.removeRamDiskFile()  
-    
-    
-class ShowProgress(object):
-    def __init__(self,nsteps,total):
-        self.nsteps=nsteps
-        self.total=total
-        self._stepvec=[]
-        for i in range(nsteps):
-            self._stepvec.append(float(i+1)*float(total)/float(nsteps))
-            
-        self._counter=0
-        
-    def show(self,index):
-        if index==0:
-            logging.info('0%')
-        if index>self._stepvec[self._counter]:
-            logging.info(str(int(float(index)/float(self.total)*100))+'%')
-            self._counter=self._counter+1
-        
-    def reset(self):
-        self._counter=0
-        
-
-
-
-class TrainData(object):
+class TrainData(trainData):
     '''
     Base class for batch-wise training of the DNN
     '''
-    
-    
-    
-    
     def __init__(self):
-        '''
-        Constructor
-        
-        '''
-        
-        self.treename=""
-        self.undefTruth=[]  
-        self.referenceclass=''
-        self.truthclasses=[]
-        self.allbranchestoberead=[]
-        
-        self.weightbranchX=''
-        self.weightbranchY=''
-        self.weight_binX = numpy.array([-1e12, 1e12],dtype=float)
-        self.weight_binY = numpy.array([-1e12, 1e12],dtype=float)
-        
-        self.reducedtruthclasses=[]
-        self.regressiontargetclasses=[]
-        
-        self.flatbranches=[]
-        self.branches=[]
-        self.branchcutoffs=[]
-                
-        self.readthread=None
-        self.readdone=None
-        
-        self.remove=True    
-        self.weight=False
+        trainData.__init__(self)
         
-        self.clear()
-        
-        self.reduceTruth(None)
-        
-    def __del__(self):
-        self.readIn_abort()
-        self.clear()
-        
-
-    def clear(self):
-        self.samplename=''
-        self.readIn_abort()
-        self.readthread=None
-        self.readdone=None
-        if hasattr(self, 'x'):
-            del self.x
-            del self.y
-            del self.w
-        if hasattr(self, 'w_list'):
-            del self.w_list
-            del self.x_list
-            del self.y_list
-            
-        self.x=[numpy.array([])]
-        self.y=[numpy.array([])]
-        self.w=[numpy.array([])]
-        
-        self.nsamples=None
     
-    def defineCustomPredictionLabels(self, labels):
-        self.customlabels=labels
-        
     def getInputShapes(self):
-        '''
-        returns a list for each input shape. In most cases only one entry
-        '''
-        outl=[]
-        for x in self.x:
-            outl.append(x.shape)
-        shapes=[]
-        for s in outl:
-            _sl=[]
-            for i in range(len(s)):
-                if i:
-                    _sl.append(s[i])
-            s=(_sl)
-            if len(s)==0:
-                s.append(1)
-            shapes.append(s)
-            
-        if hasattr(self,'generatePerBatch') and self.generatePerBatch:
-            shapes.append([len(self.generatePerBatch)])
-            
-        return shapes
-        
-    def getTruthShapes(self):
-        outl=[len(self.getUsedTruth())]
-        return outl
-    
-    
-    def getNRegressionTargets(self):
-        if not self.regressiontargetclasses:
-            return 0
-        return len(self.regressiontargetclasses)
-    
-    def getNClassificationTargets(self):
-        return len(self.getUsedTruth())
-        
-    def addBranches(self, blist, cutoff=1):
-        self.branches.append(blist)
-        self.registerBranches(blist)
-        self.branchcutoffs.append(cutoff)
-        
-    def registerBranches(self,blist):
-        self.allbranchestoberead.extend(blist)
-        
-    def getUsedTruth(self):
-        if len(self.reducedtruthclasses) > 0:
-            return self.reducedtruthclasses
-        else:
-            return self.truthclasses
-    
-
-    def reduceTruth(self, tuple_in=None):
-        self.reducedtruthclasses=self.truthclasses
-        if tuple_in is not None:
-            return numpy.array(tuple_in.tolist())
-
-    def writeOut(self,fileprefix):
-        
-        import h5py
-        fileTimeOut(fileprefix,120)
-        h5f = h5py.File(fileprefix, 'w')
-        
-        # try "lzf", too, faster, but less compression
-        def _writeoutListinfo(arrlist,fidstr,h5F):
-            arr=numpy.array([len(arrlist)])
-            h5F.create_dataset(fidstr+'_listlength',data=arr)
-            for i in range(len(arrlist)):
-                idstr=fidstr+str(i)
-                h5F.create_dataset(idstr+'_shape',data=arrlist[i].shape)
-            
-        def _writeoutArrays(arrlist,fidstr,h5F):    
-            for i in range(len(arrlist)):
-                idstr=fidstr+str(i)
-                arr=arrlist[i]
-                if "meta" in fileprefix[-4:]:
-                    from DeepJetCore.compiled.c_readArrThreaded import writeArray
-                    if arr.dtype!='float32':
-                        arr=arr.astype('float32')
-                    writeArray(arr.ctypes.data,fileprefix[:-4]+fidstr+'.'+str(i),list(arr.shape))
-                else:
-                    h5F.create_dataset(idstr, data=arr, compression="lzf")
-        
+        print('TrainData:getInputShapes: Deprecated, use getNumpyFeatureShapes instead')
+        return self.getNumpyFeatureShapes()
         
-        arr=numpy.array([self.nsamples],dtype='int')
-        h5f.create_dataset('n', data=arr)
-
-        _writeoutListinfo(self.w,'w',h5f)
-        _writeoutListinfo(self.x,'x',h5f)
-        _writeoutListinfo(self.y,'y',h5f)
-
-        _writeoutArrays(self.w,'w',h5f)
-        _writeoutArrays(self.x,'x',h5f)
-        _writeoutArrays(self.y,'y',h5f)
-        
-        h5f.close()
-       
     
-        
-       
-    def __createArr(self,shapeinfo):
-        import ctypes
-        import multiprocessing
-        fulldim=1
-        for d in shapeinfo:
-            fulldim*=d 
-        if fulldim < 0: #catch some weird things that happen when there is a file IO error
-            fulldim=0 
-        # reserve memory for array
-        shared_array_base = multiprocessing.RawArray(ctypes.c_float, int(fulldim))
-        shared_array = numpy.ctypeslib.as_array(shared_array_base)#.get_obj())
-        #print('giving shape',shapeinfo)
-        shared_array = shared_array.reshape(shapeinfo)
-        #print('gave shape',shapeinfo)
-        return shared_array
-    
-    def removeRamDiskFile(self):
-        if hasattr(self, 'ramdiskfile'):
-            import os
-            try:
-                if self.ramdiskfile and os.path.exists(self.ramdiskfile):
-                    if "meta" in self.ramdiskfile[-4:]:
-                        os.system('rm -f '+self.ramdiskfile[:-4]+"*")
-                    else:
-                        os.remove(self.ramdiskfile)
-            except OSError:
-                pass
-            self.ramdiskfile=None
-               
-    def readIn_async(self,fileprefix,read_async=True,shapesOnly=False,ramdiskpath='',randomseed=None):
-        
-        if self.readthread and read_async:
-            print('\nTrainData::readIn_async: started new read before old was finished. Intended? Waiting for first to finish...\n')
-            self.readIn_join()
-            
-        #print('read')
-        
-        import h5py
-        import multiprocessing
-        
-        #print('\ninit async read\n')
-        
-        fileTimeOut(fileprefix,120)
-        #print('\nfile access ok\n')
-        self.samplename=fileprefix
-        
-        
-        
-        def _readListInfo_(idstr):
-            sharedlist=[]
-            shapeinfos=[]
-            wlistlength=self.h5f[idstr+'_listlength'][0]
-            #print(idstr,'list length',wlistlength)
-            for i in range(wlistlength):
-                sharedlist.append(numpy.array([]))
-                iidstr=idstr+str(i)
-                shapeinfo=numpy.array(self.h5f[iidstr+'_shape'])
-                shapeinfos.append(shapeinfo)
-            return sharedlist, shapeinfos
-        
-        
-        with threadingfileandmem_lock:
-            try:
-                self.h5f = h5py.File(fileprefix,'r')
-            except:
-                raise IOError('File %s could not be opened properly, it may be corrupted' % fileprefix)
-            self.nsamples=self.h5f['n']
-            self.nsamples=self.nsamples[0]
-            if True or not hasattr(self, 'w_shapes'):
-                self.w_list,self.w_shapes=_readListInfo_('w')
-                self.x_list,self.x_shapes=_readListInfo_('x')
-                self.y_list,self.y_shapes=_readListInfo_('y')
-            else:
-                print('\nshape known\n')
-                self.w_list,_=_readListInfo_('w')
-                self.x_list,_=_readListInfo_('x')
-                self.y_list,_=_readListInfo_('y')
-                
-            self.h5f.close()
-            del self.h5f
-            self.h5f=None
-            if shapesOnly:
-                return
-            
-            readfile=fileprefix
-            
-            isRamDisk=len(ramdiskpath)>0
-            if isRamDisk:
-                import shutil
-                import uuid
-                import os
-                import copy
-                unique_filename=''
-                
-                unique_filename = ramdiskpath+'/'+str(uuid.uuid4())+'.z'
-                if "meta" in readfile[-4:]:
-                    filebase=readfile[:-4]
-                    unique_filename = ramdiskpath+'/'+str(uuid.uuid4())
-                    shutil.copyfile(filebase+'meta',unique_filename+'.meta')
-                    for i in range(len(self.w_list)):
-                        shutil.copyfile(filebase+'w.'+str(i),unique_filename+'.w.'+str(i))
-                    for i in range(len(self.x_list)):
-                        shutil.copyfile(filebase+'x.'+str(i),unique_filename+'.x.'+str(i))
-                    for i in range(len(self.y_list)):
-                        shutil.copyfile(filebase+'y.'+str(i),unique_filename+'.y.'+str(i))
-                    unique_filename+='.meta'
-                        
-                else:
-                    unique_filename = ramdiskpath+'/'+str(uuid.uuid4())+'.z'
-                    shutil.copyfile(fileprefix, unique_filename)
-                readfile=unique_filename
-                self.ramdiskfile=readfile
-
-            #create shared mem in sync mode
-            for i in range(len(self.w_list)):
-                self.w_list[i]=self.__createArr(self.w_shapes[i])
-                
-            for i in range(len(self.x_list)):
-                self.x_list[i]=self.__createArr(self.x_shapes[i])
-                
-            for i in range(len(self.y_list)):
-                self.y_list[i]=self.__createArr(self.y_shapes[i])
-            
-            if read_async:
-                self.readdone=multiprocessing.Value('b',False)
-                        
-        if read_async:
-            if "meta" in readfile[-4:]:
-                #new format
-                from DeepJetCore.compiled.c_readArrThreaded import startReading
-                self.readthreadids=[]
-                filebase=readfile[:-4]
-                for i in range(len(self.w_list)):
-                    self.readthreadids.append(startReading(self.w_list[i].ctypes.data,
-                                                           filebase+'w.'+str(i),
-                                                           list(self.w_list[i].shape),
-                                                           isRamDisk))
-                for i in range(len(self.x_list)):
-                    self.readthreadids.append(startReading(self.x_list[i].ctypes.data,
-                                                           filebase+'x.'+str(i),
-                                                           list(self.x_list[i].shape),
-                                                           isRamDisk))
-                for i in range(len(self.y_list)):
-                    self.readthreadids.append(startReading(self.y_list[i].ctypes.data,
-                                                           filebase+'y.'+str(i),
-                                                           list(self.y_list[i].shape),
-                                                           isRamDisk))
-                
-                
-            else:
-                self.readthread=multiprocessing.Process(target=_read_arrs_, 
-                                                        args=(self.w_list,
-                                                              self.x_list,
-                                                              self.y_list,
-                                                              self.readdone,
-                                                              readfile,
-                                                              self,randomseed))
-                self.readthread.start()
-        else:
-            if "meta" in readfile[-4:]:
-                from DeepJetCore.compiled.c_readArrThreaded import readBlocking
-                filebase=readfile[:-4]
-                self.readthreadids=[]
-                for i in range(len(self.w_list)):
-                    (readBlocking(self.w_list[i].ctypes.data,
-                                                           filebase+'w.'+str(i),
-                                                           list(self.w_list[i].shape),
-                                                           isRamDisk))
-                for i in range(len(self.x_list)):
-                    (readBlocking(self.x_list[i].ctypes.data,
-                                                           filebase+'x.'+str(i),
-                                                           list(self.x_list[i].shape),
-                                                           isRamDisk))
-                for i in range(len(self.y_list)):
-                    (readBlocking(self.y_list[i].ctypes.data,
-                                                           filebase+'y.'+str(i),
-                                                           list(self.y_list[i].shape),
-                                                           isRamDisk))
-                
-            else:
-                self.readdone=multiprocessing.Value('b',False)
-                _read_arrs_(self.w_list,self.x_list,self.y_list,self.readdone,readfile,self,randomseed)
-            
-            
-        
-    def readIn_abort(self):
-        self.removeRamDiskFile()
-        if not self.readthread:
-            return
-        self.readthread.terminate()
-        self.readthread=None
-        self.readdone=None
-     
-    def readIn_join(self,wasasync=True,waitforStart=True):
-        
-        try:
-            if not not hasattr(self, 'readthreadids') and not waitforStart and not self.readthread and wasasync:
-                print('\nreadIn_join:read never started\n')
-            
-            import time
-            if waitforStart:
-                while (not hasattr(self, 'readthreadids')) and not self.readthread:
-                    time.sleep(0.1)
-                if hasattr(self, 'readthreadids'):
-                    while not self.readthreadids:
-                        time.sleep(0.1)
-            
-            counter=0
-            
-            if hasattr(self, 'readthreadids') and self.readthreadids:
-                from DeepJetCore.compiled.c_readArrThreaded import isDone
-                doneids=[]
-                while True:
-                    for id in self.readthreadids:
-                        if id in doneids: continue
-                        if isDone(id):
-                            doneids.append(id)
-                    if len(self.readthreadids) == len(doneids):
-                        break
-                    time.sleep(0.1)
-                    counter+=1
-                    if counter>3000: #read failed. do synchronous read, safety option if threads died
-                        print('\nfalling back to sync read\n')
-                        self.readthread.terminate()
-                        self.readthread=None
-                        self.readIn(self.samplename)
-                        return
-                
-            else: #will be removed at some point
-                while wasasync and (not self.readdone or not self.readdone.value): 
-                    if not self.readthread:
-                        time.sleep(.1)
-                        continue
-                    self.readthread.join(.1)
-                    counter+=1
-                    if counter>3000: #read failed. do synchronous read, safety option if threads died
-                        print('\nfalling back to sync read\n')
-                        self.readthread.terminate()
-                        self.readthread=None
-                        self.readIn(self.samplename)
-                        return
-                if self.readdone.value:
-                    self.readthread.join(.1)
-                    
-            import copy
-            #move away from shared memory
-            #this costs performance but seems necessary
-            direct=False
-            with threadingfileandmem_lock:
-                if direct:
-                    self.w=self.w_list
-                    self.x=self.x_list
-                    self.y=self.y_list
-                else:
-                    self.w=copy.deepcopy(self.w_list)
-                    self.x=copy.deepcopy(self.x_list)
-                    self.y=copy.deepcopy(self.y_list)
-                    
-                del self.w_list
-                del self.x_list
-                del self.y_list
-            #in case of some errors during read-in
-            
-        except Exception as d:
-            raise d
-        finally:
-            self.removeRamDiskFile()
-        
-        #check if this is really neccessary 
-        def reshape_fast(arr,shapeinfo):
-            if len(shapeinfo)<2:
-                shapeinfo=numpy.array([arr.shape[0],1])
-            arr=arr.reshape(shapeinfo)
-            return arr
-        
-        
-        for i in range(len(self.w)):
-            self.w[i]=reshape_fast(self.w[i],self.w_shapes[i])
-        for i in range(len(self.x)):
-            self.x[i]=reshape_fast(self.x[i],self.x_shapes[i])
-        for i in range(len(self.y)):
-            self.y[i]=reshape_fast(self.y[i],self.y_shapes[i])
-        
-        self.w_list=None
-        self.x_list=None
-        self.y_list=None
-        if wasasync and self.readthread:
-            self.readthread.terminate()
-        self.readthread=None
-        self.readdone=None
-        
     def readIn(self,fileprefix,shapesOnly=False):
-        self.readIn_async(fileprefix,False,shapesOnly)
-        direct=True
-        if direct:
-            self.w=self.w_list
-            self.x=self.x_list
-            self.y=self.y_list
+        print('TrainData:readIn deprecated, use readFromFile')
+        self.readFromFile(fileprefix,shapesOnly)
+    
+    
+    def _convertToCppType(self,a,helptext):
+        saout=None
+        if str(type(a)) == "<class 'DeepJetCore.SimpleArray.SimpleArray'>":
+            saout = a.sa
+        elif str(type(a)) == "<type 'numpy.ndarray'>" or str(type(a)) == "<class 'numpy.ndarray'>":
+            rs = np.array([])
+            a = SimpleArray(a,rs)
+            saout = a.sa
         else:
-            import copy
-            self.w=copy.deepcopy(self.w_list)
-            del self.w_list
-            self.x=copy.deepcopy(self.x_list)
-            del self.x_list
-            self.y=copy.deepcopy(self.y_list)
-            del self.y_list
-        
-        def reshape_fast(arr,shapeinfo):
-            if len(shapeinfo)<2:
-                shapeinfo=numpy.array([arr.shape[0],1])
-            if shapesOnly:
-                arr=numpy.zeros(shape=shapeinfo)
-            else:
-                arr=arr.reshape(shapeinfo)
-            return arr
+            raise ValueError("TrainData._convertToCppType MUST produce either a list of numpy arrays or a list of DeepJetCore simpleArrays!")
         
-        
-            
+        if saout.hasNanOrInf():
+            raise ValueError("TrainData._convertToCppType: the "+helptext+" array "+saout.name()+" has NaN or inf entries")
+        return saout
             
-        for i in range(len(self.w)):
-            self.w[i]=reshape_fast(self.w[i],self.w_shapes[i])
-        for i in range(len(self.x)):
-            self.x[i]=reshape_fast(self.x[i],self.x_shapes[i])
-        for i in range(len(self.y)):
-            self.y[i]=reshape_fast(self.y[i],self.y_shapes[i])
+    def _store(self, x, y, w):
+        for xa in x:
+            self.storeFeatureArray(self._convertToCppType(xa, "feature"))
+        x = [] #collect garbage
+        for ya in y:
+            self.storeTruthArray(self._convertToCppType(ya, "truth"))
+        y = []
+        for wa in w:
+            self.storeWeightArray(self._convertToCppType(wa, "weight"))
+        w = []    
         
-        self.w_list=None
-        self.x_list=None
-        self.y_list=None
-        self.readthread=None
+    def readFromSourceFile(self,filename, weighterobjects={}, istraining=False, **kwargs):
+        x,y,w = self.convertFromSourceFile(filename, weighterobjects, istraining,  **kwargs)
+        self._store(x,y,w)
         
+
+    ################# functions to be defined by the user    
         
-    def readTreeFromRootToTuple(self, filenames, limit=None, branches=None):
+    def createWeighterObjects(self, allsourcefiles):
         '''
-        To be used to get the initial tupel for further processing in inherting classes
-        Makes sure the number of entries is properly set
-        
-        can also read a list of files (e.g. to produce weights/removes from larger statistics
-        (not fully tested, yet)
+        Will be called on the full list of source files once.
+        Can be used to create weighter objects or similar that can
+        then be applied to each individual conversion.
+        Should return a dictionary
         '''
-        if  branches==None:
-            branches=self.allbranchestoberead
-            
-        #print(branches)
-        #remove duplicates
-        usebranches=list(set(branches))
-        tmpbb=[]
-        for b in usebranches:
-            if len(b):
-                tmpbb.append(b)
-        usebranches=tmpbb
-            
-        import ROOT
-        from root_numpy import tree2array, root2array
-        if isinstance(filenames, list):
-            for f in filenames:
-                fileTimeOut(f,120)
-            print('add files')
-            nparray = root2array(
-                filenames, 
-                treename = self.treename, 
-                stop = limit,
-                branches = usebranches
-                )
-            print('done add files')
-            return nparray
-            print('add files')
-        else:    
-            fileTimeOut(filenames,120) #give eos a minute to recover
-            rfile = ROOT.TFile(filenames)
-            tree = rfile.Get(self.treename)
-            if not self.nsamples:
-                self.nsamples=tree.GetEntries()
-            nparray = tree2array(tree, stop=limit, branches=usebranches)
-            return nparray
-        
-    def make_means(self, nparray):
-        from preprocessing import meanNormProd
-        return meanNormProd(nparray)
-        
-    def produceMeansFromRootFile(self,filename, limit=500000):
-        from preprocessing import meanNormProd
-        nparray = self.readTreeFromRootToTuple(filename, limit=limit)
-        means = self.make_means(nparray)
-        del nparray
-        return means
-    
-    #overload if necessary
-    def make_empty_weighter(self):
-        from Weighter import Weighter
-        weighter = Weighter() 
-        weighter.undefTruth = self.undefTruth
-        
-        if self.remove or self.weight:
-            weighter.setBinningAndClasses(
-                [self.weight_binX,self.weight_binY],
-                self.weightbranchX,self.weightbranchY,
-                self.truthclasses
-                )
-        return weighter
-
-       
-    def produceBinWeighter(self,filenames):
-        weighter = self.make_empty_weighter()
-        branches = [self.weightbranchX,self.weightbranchY]
-        branches.extend(self.truthclasses)
-        showprog=ShowProgress(5,len(filenames))
-        counter=0
-        if self.remove or self.weight:
-            for fname in filenames:
-                nparray = self.readTreeFromRootToTuple(fname, branches=branches)
-                weighter.addDistributions(nparray)
-                del nparray
-                showprog.show(counter)
-                counter=counter+1
-            weighter.createRemoveProbabilitiesAndWeights(self.referenceclass)
-        return weighter
-    
-        
-    
-
-    def _normalize_input_(self, weighter, npy_array):
-        weights = None
-        if self.weight:
-            weights=weighter.getJetWeights(npy_array)
-            self.w = [weights for _ in self.y]
-        elif self.remove:
-            notremoves=weighter.createNotRemoveIndices(npy_array)
-            if self.undefTruth:
-                undef=npy_array[self.undefTruth].sum(axis=1)
-                notremoves-=undef
-            print(' to created remove indices')
-            weights=notremoves
-
-            print('remove')
-            self.x = [x[notremoves > 0] for x in self.x]
-            self.y = [y[notremoves > 0] for y in self.y]
-            weights=weights[notremoves > 0]
-            self.w = [weights for _ in self.y]
-            newnsamp=self.x[0].shape[0]
-            print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
-            self.nsamples = newnsamp
-        else:
-            print('neither remove nor weight')
-            weights=numpy.empty(self.nsamples)
-            weights.fill(1.)
-            self.w = [weights for _ in self.y]        
-
-        
+        return {}
+    
+    ### perform a simple and quick check if the file is not corrupt. Can be called in advance to conversion
+    # return False if file is corrupt
+    def fileIsValid(self, filename):
+        return True
+    
+    ### either of the following need to be defined
+    
+    ## if direct writeout is useful
+    def writeFromSourceFile(self, filename, weighterobjects, istraining, outname):
+        self.readFromSourceFile(filename, weighterobjects, istraining)
+        self.writeToFile(outname)
+    
+    ## otherwise only define the conversion rule
+    # returns a list of numpy arrays OR simpleArray (mandatory for ragged tensors)
+    def convertFromSourceFile(self, filename, weighterobjects, istraining):
+        return [],[],[]
+    
+    ## defines how to write out the prediction
+    # must not use any of the stored arrays, only the inputs
+    # optionally it can return the output file name to be added to a list of output files
+    def writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile):
+        return None
 
diff --git a/TrainData_compat.py b/TrainData_compat.py
new file mode 100644
index 0000000..a5205ed
--- /dev/null
+++ b/TrainData_compat.py
@@ -0,0 +1,718 @@
+'''
+Created on 20 Feb 2017
+
+@author: jkiesele
+
+
+Old (pre 2.0) version for conversions
+
+'''
+
+from __future__ import print_function
+
+from Weighter import Weighter
+from pdb import set_trace
+import os
+import time
+import numpy
+import logging
+import tempfile
+import copy
+import shutil
+import threading
+import multiprocessing
+
+#threadingfileandmem_lock=threading.Lock()
+#threadingfileandmem_lock.release()
+#multiproc_fileandmem_lock=multiprocessing.Lock()
+
+def fileTimeOut(fileName, timeOut):
+    '''
+    simple wait function in case the file system has a glitch.
+    waits until the dir, the file should be stored in/read from, is accessible
+    again, or the the timeout
+    '''
+    filepath=os.path.dirname(fileName)
+    if len(filepath) < 1:
+        filepath = '.'
+    if os.path.isdir(filepath):
+        return
+
+    counter=0
+    print('file I/O problems... waiting for filesystem to become available for '+fileName)
+    while not os.path.isdir(filepath):
+        if counter > timeOut:
+            print('...file could not be opened within '+str(timeOut)+ ' seconds')
+        counter+=1
+        time.sleep(1)
+
+
+def _read_arrs_(arrwl,arrxl,arryl,doneVal,fileprefix,tdref=None,randomSeed=None):
+    import gc
+    gc.collect()
+
+    import h5py
+    from sklearn.utils import shuffle
+    try:
+        idstrs=['w','x','y']
+        h5f = h5py.File(fileprefix,'r')
+        alllists=[arrwl,arrxl,arryl]
+        for j in range(len(idstrs)):
+            fidstr=idstrs[j]
+            arl=alllists[j]
+            for i in range(len(arl)):
+                idstr=fidstr+str(i)
+                h5f[idstr].read_direct(arl[i])
+                #shuffle each read-in, but each array with the same seed (keeps right asso)
+                if randomSeed:
+                    arl[i]=shuffle(arl[i], random_state=randomSeed)
+                
+        doneVal.value=True
+        h5f.close()
+        del h5f
+    except Exception as d:
+        raise d
+    finally:
+        if tdref:
+            tdref.removeRamDiskFile()  
+    
+    
+class ShowProgress(object):
+    def __init__(self,nsteps,total):
+        self.nsteps=nsteps
+        self.total=total
+        self._stepvec=[]
+        for i in range(nsteps):
+            self._stepvec.append(float(i+1)*float(total)/float(nsteps))
+            
+        self._counter=0
+        
+    def show(self,index):
+        if index==0:
+            logging.info('0%')
+        if index>self._stepvec[self._counter]:
+            logging.info(str(int(float(index)/float(self.total)*100))+'%')
+            self._counter=self._counter+1
+        
+    def reset(self):
+        self._counter=0
+        
+
+
+
+class TrainData(object):
+    '''
+    Base class for batch-wise training of the DNN
+    '''
+    
+    
+    
+    
+    def __init__(self):
+        '''
+        Constructor
+        
+        '''
+        
+        self.treename=""
+        self.undefTruth=[]  
+        self.referenceclass=''
+        self.truthclasses=[]
+        self.allbranchestoberead=[]
+        
+        self.weightbranchX=''
+        self.weightbranchY=''
+        self.weight_binX = numpy.array([-1e12, 1e12],dtype=float)
+        self.weight_binY = numpy.array([-1e12, 1e12],dtype=float)
+        
+        self.reducedtruthclasses=[]
+        self.regressiontargetclasses=[]
+        
+        self.flatbranches=[]
+        self.branches=[]
+        self.branchcutoffs=[]
+                
+        self.readthread=None
+        self.readdone=None
+        
+        self.remove=True    
+        self.weight=False
+        
+        self.clear()
+        
+        self.reduceTruth(None)
+        
+    def __del__(self):
+        self.readIn_abort()
+        self.clear()
+        
+
+    def clear(self):
+        self.samplename=''
+        self.readIn_abort()
+        self.readthread=None
+        self.readdone=None
+        if hasattr(self, 'x'):
+            del self.x
+            del self.y
+            del self.w
+        if hasattr(self, 'w_list'):
+            del self.w_list
+            del self.x_list
+            del self.y_list
+            
+        self.x=[numpy.array([])]
+        self.y=[numpy.array([])]
+        self.w=[numpy.array([])]
+        
+        self.nsamples=None
+        
+    def skim(self, event=0):
+        xs=[]
+        ys=[]
+        ws=[]
+        
+        for x in self.x:
+            xs.append(x[event:event+1,...])
+        for y in self.y:
+            ys.append(y[event:event+1,...])
+        for w in self.w:
+            ws.append(w[event:event+1,...])
+        self.clear()
+        self.nsamples=1
+        self.x=xs
+        self.y=ys
+        self.w=ws 
+    
+    def defineCustomPredictionLabels(self, labels):
+        self.customlabels=labels
+        
+    def getInputShapes(self):
+        '''
+        returns a list for each input shape. In most cases only one entry
+        '''
+        outl=[]
+        for x in self.x:
+            outl.append(x.shape)
+        shapes=[]
+        for s in outl:
+            _sl=[]
+            for i in range(len(s)):
+                if i:
+                    _sl.append(s[i])
+            s=(_sl)
+            if len(s)==0:
+                s.append(1)
+            shapes.append(s)
+            
+        if hasattr(self,'generatePerBatch') and self.generatePerBatch:
+            shapes.append([len(self.generatePerBatch)])
+            
+        return shapes
+        
+    def getTruthShapes(self):
+        outl=[len(self.getUsedTruth())]
+        return outl
+    
+    
+    def getNRegressionTargets(self):
+        if not self.regressiontargetclasses:
+            return 0
+        return len(self.regressiontargetclasses)
+    
+    def getNClassificationTargets(self):
+        return len(self.getUsedTruth())
+        
+    def addBranches(self, blist, cutoff=1):
+        self.branches.append(blist)
+        self.registerBranches(blist)
+        self.branchcutoffs.append(cutoff)
+        
+    def registerBranches(self,blist):
+        self.allbranchestoberead.extend(blist)
+        
+    def getUsedTruth(self):
+        if len(self.reducedtruthclasses) > 0:
+            return self.reducedtruthclasses
+        else:
+            return self.truthclasses
+    
+
+    def reduceTruth(self, tuple_in=None):
+        self.reducedtruthclasses=self.truthclasses
+        if tuple_in is not None:
+            return numpy.array(tuple_in.tolist())
+
+    def writeOut(self,fileprefix):
+        
+        import h5py
+        
+        #this is a workaround because hdf5 files written on eos are unreadable...
+        final_output_file=fileprefix
+
+        # with h5py >= 2.9 you can directly write to an open tempfile, but for now
+        # we'd need to use tempfile as a safe name generator
+        #with tempfile.NamedTemporaryFile(suffix='.meta', delete=False) as t:
+        #    h5f = h5py.File(t)
+        
+        t = tempfile.NamedTemporaryFile(suffix='.meta', delete=False)
+        t.close()
+
+        h5f = h5py.File(t.name, 'w')
+        
+        # try "lzf", too, faster, but less compression
+        def _writeoutListinfo(arrlist,fidstr,h5F):
+            arr=numpy.array([len(arrlist)])
+            h5F.create_dataset(fidstr+'_listlength',data=arr)
+            for i in range(len(arrlist)):
+                idstr=fidstr+str(i)
+                h5F.create_dataset(idstr+'_shape',data=arrlist[i].shape)
+            
+        def _writeoutArrays(arrlist,fidstr,h5F):    
+            for i in range(len(arrlist)):
+                idstr=fidstr+str(i)
+                arr=arrlist[i]
+                from DeepJetCore.compiled.c_readArrThreaded import writeArray
+                if arr.dtype!='float32':
+                    arr=arr.astype('float32')
+                writeArray(arr.ctypes.data,final_output_file[:-4]+fidstr+'.'+str(i),list(arr.shape))
+        
+        arr=numpy.array([self.nsamples],dtype='int')
+        h5f.create_dataset('n', data=arr)
+        
+        _writeoutListinfo(self.w,'w',h5f)
+        _writeoutListinfo(self.x,'x',h5f)
+        _writeoutListinfo(self.y,'y',h5f)
+        
+        _writeoutArrays(self.w,'w',h5f)
+        _writeoutArrays(self.x,'x',h5f)
+        _writeoutArrays(self.y,'y',h5f)
+        
+        h5f.close()
+            
+        shutil.copyfile(t.name, final_output_file)
+       
+    def __createArr(self,shapeinfo):
+        return numpy.ascontiguousarray(numpy.zeros(shape=shapeinfo), dtype=numpy.float32)
+    
+    def removeRamDiskFile(self):
+        if hasattr(self, 'ramdiskfile'):
+            try:
+                if self.ramdiskfile and os.path.exists(self.ramdiskfile):
+                    if "meta" in self.ramdiskfile[-4:]:
+                        os.system('rm -f '+self.ramdiskfile[:-4]+"*")
+                    else:
+                        os.remove(self.ramdiskfile)
+            except OSError:
+                pass
+            self.ramdiskfile=None
+               
+    def readIn_async(self,fileprefix,read_async=True,shapesOnly=False,ramdiskpath='',randomseed=None):
+        
+        if self.readthread and read_async:
+            print('\nTrainData::readIn_async: started new read before old was finished. Intended? Waiting for first to finish...\n')
+            self.readIn_join()
+            
+        #print('read')
+        
+        import h5py
+        
+        #print('\ninit async read\n')
+        
+        fileTimeOut(fileprefix,120)
+        #print('\nfile access ok\n')
+        self.samplename=fileprefix
+        
+        
+        
+        def _readListInfo_(idstr):
+            sharedlist=[]
+            shapeinfos=[]
+            wlistlength=self.h5f[idstr+'_listlength'][0]
+            #print(idstr,'list length',wlistlength)
+            for i in range(wlistlength):
+                sharedlist.append(numpy.array([]))
+                iidstr=idstr+str(i)
+                shapeinfo=numpy.array(self.h5f[iidstr+'_shape'])
+                shapeinfos.append(shapeinfo)
+            return sharedlist, shapeinfos
+        
+        
+        
+        try:
+            self.h5f = h5py.File(fileprefix,'r')
+        except:
+            raise IOError('File %s could not be opened properly, it may be corrupted' % fileprefix)
+        self.nsamples=self.h5f['n']
+        self.nsamples=self.nsamples[0]
+        if True or not hasattr(self, 'w_shapes'):
+            self.w_list,self.w_shapes=_readListInfo_('w')
+            self.x_list,self.x_shapes=_readListInfo_('x')
+            self.y_list,self.y_shapes=_readListInfo_('y')
+        else:
+            print('\nshape known\n')
+            self.w_list,_=_readListInfo_('w')
+            self.x_list,_=_readListInfo_('x')
+            self.y_list,_=_readListInfo_('y')
+            
+        self.h5f.close()
+        del self.h5f
+        self.h5f=None
+        if shapesOnly:
+            return
+        
+        readfile=fileprefix
+        
+        isRamDisk=len(ramdiskpath)>0
+        if isRamDisk:
+            import uuid
+            unique_filename=''
+            
+            unique_filename = ramdiskpath+'/'+str(uuid.uuid4())+'.z'
+            if "meta" in readfile[-4:]:
+                filebase=readfile[:-4]
+                unique_filename = ramdiskpath+'/'+str(uuid.uuid4())
+                shutil.copyfile(filebase+'meta',unique_filename+'.meta')
+                for i in range(len(self.w_list)):
+                    shutil.copyfile(filebase+'w.'+str(i),unique_filename+'.w.'+str(i))
+                for i in range(len(self.x_list)):
+                    shutil.copyfile(filebase+'x.'+str(i),unique_filename+'.x.'+str(i))
+                for i in range(len(self.y_list)):
+                    shutil.copyfile(filebase+'y.'+str(i),unique_filename+'.y.'+str(i))
+                unique_filename+='.meta'
+                    
+            else:
+                unique_filename = ramdiskpath+'/'+str(uuid.uuid4())+'.z'
+                shutil.copyfile(fileprefix, unique_filename)
+            readfile=unique_filename
+            self.ramdiskfile=readfile
+
+        #create shared mem in sync mode
+        for i in range(len(self.w_list)):
+            self.w_list[i]=self.__createArr(self.w_shapes[i])
+            
+        for i in range(len(self.x_list)):
+            self.x_list[i]=self.__createArr(self.x_shapes[i])
+            
+        for i in range(len(self.y_list)):
+            self.y_list[i]=self.__createArr(self.y_shapes[i])
+
+                    
+        if read_async:
+            if "meta" in readfile[-4:]:
+                #new format
+                from DeepJetCore.compiled.c_readArrThreaded import startReading
+                self.readthreadids=[]
+                filebase=readfile[:-4]
+                for i in range(len(self.w_list)):
+                    self.readthreadids.append(startReading(self.w_list[i].ctypes.data,
+                                                           filebase+'w.'+str(i),
+                                                           fileprefix,
+                                                           list(self.w_list[i].shape),
+                                                           isRamDisk))
+                for i in range(len(self.x_list)):
+                    self.readthreadids.append(startReading(self.x_list[i].ctypes.data,
+                                                           filebase+'x.'+str(i),
+                                                           fileprefix,
+                                                           list(self.x_list[i].shape),
+                                                           isRamDisk))
+                for i in range(len(self.y_list)):
+                    self.readthreadids.append(startReading(self.y_list[i].ctypes.data,
+                                                           filebase+'y.'+str(i),
+                                                           fileprefix,
+                                                           list(self.y_list[i].shape),
+                                                           isRamDisk))
+                
+        else:
+            if "meta" in readfile[-4:]:
+                from DeepJetCore.compiled.c_readArrThreaded import readBlocking
+                filebase=readfile[:-4]
+                self.readthreadids=[]
+                for i in range(len(self.w_list)):
+                    (readBlocking(self.w_list[i].ctypes.data,
+                                                           filebase+'w.'+str(i),
+                                                           fileprefix,
+                                                           list(self.w_list[i].shape),
+                                                           isRamDisk))
+                for i in range(len(self.x_list)):
+                    (readBlocking(self.x_list[i].ctypes.data,
+                                                           filebase+'x.'+str(i),
+                                                           fileprefix,
+                                                           list(self.x_list[i].shape),
+                                                           isRamDisk))
+                for i in range(len(self.y_list)):
+                    (readBlocking(self.y_list[i].ctypes.data,
+                                                           filebase+'y.'+str(i),
+                                                           fileprefix,
+                                                           list(self.y_list[i].shape),
+                                                           isRamDisk))
+            
+            
+        
+    def readIn_abort(self):
+        self.removeRamDiskFile()
+        if not self.readthread:
+            return
+        self.readthread.terminate()
+        self.readthread=None
+        self.readdone=None
+     
+    def readIn_join(self,wasasync=True,waitforStart=True):
+        
+        try:
+            if not not hasattr(self, 'readthreadids') and not waitforStart and not self.readthread and wasasync:
+                print('\nreadIn_join:read never started\n')
+            
+            if waitforStart:
+                while (not hasattr(self, 'readthreadids')) and not self.readthread:
+                    time.sleep(0.1)
+                if hasattr(self, 'readthreadids'):
+                    while not self.readthreadids:
+                        time.sleep(0.1)
+            
+            counter=0
+            
+            if hasattr(self, 'readthreadids') and self.readthreadids:
+                from DeepJetCore.compiled.c_readArrThreaded import isDone
+                doneids=[]
+                while True:
+                    for id in self.readthreadids:
+                        if id in doneids: continue
+                        if isDone(id):
+                            doneids.append(id)
+                    if len(self.readthreadids) == len(doneids):
+                        break
+                    time.sleep(0.1)
+                    counter+=1
+                    if counter>3000: #read failed. do synchronous read, safety option if threads died
+                        print('\nfalling back to sync read\n')
+                        self.readthread.terminate()
+                        self.readthread=None
+                        self.readIn(self.samplename)
+                        return
+                
+            
+                    
+            #move away from shared memory
+            #this costs performance but seems necessary
+            
+            self.w=copy.deepcopy(self.w_list)
+            self.x=copy.deepcopy(self.x_list)
+            self.y=copy.deepcopy(self.y_list)
+                
+            del self.w_list
+            del self.x_list
+            del self.y_list
+            #in case of some errors during read-in
+            
+        except Exception as d:
+            raise d
+        finally:
+            self.removeRamDiskFile()
+        
+        #check if this is really neccessary 
+        def reshape_fast(arr,shapeinfo):
+            if len(shapeinfo)<2:
+                shapeinfo=numpy.array([arr.shape[0],1])
+            arr=arr.reshape(shapeinfo)
+            return arr
+        
+        
+        for i in range(len(self.w)):
+            self.w[i]=reshape_fast(self.w[i],self.w_shapes[i])
+        for i in range(len(self.x)):
+            self.x[i]=reshape_fast(self.x[i],self.x_shapes[i])
+        for i in range(len(self.y)):
+            self.y[i]=reshape_fast(self.y[i],self.y_shapes[i])
+        
+        self.w_list=None
+        self.x_list=None
+        self.y_list=None
+        if wasasync and self.readthread:
+            self.readthread.terminate()
+        self.readthread=None
+        self.readdone=None
+        
+    def readIn(self,fileprefix,shapesOnly=False):
+        self.readIn_async(fileprefix,False,shapesOnly)
+        direct=True
+        if direct:
+            self.w=self.w_list
+            self.x=self.x_list
+            self.y=self.y_list
+        else:
+            self.w=copy.deepcopy(self.w_list)
+            del self.w_list
+            self.x=copy.deepcopy(self.x_list)
+            del self.x_list
+            self.y=copy.deepcopy(self.y_list)
+            del self.y_list
+        
+        def reshape_fast(arr,shapeinfo):
+            if len(shapeinfo)<2:
+                shapeinfo=numpy.array([arr.shape[0],1])
+            if shapesOnly:
+                arr=numpy.zeros(shape=shapeinfo)
+            else:
+                arr=arr.reshape(shapeinfo)
+            return arr
+        
+        
+            
+            
+        for i in range(len(self.w)):
+            self.w[i]=reshape_fast(self.w[i],self.w_shapes[i])
+        for i in range(len(self.x)):
+            self.x[i]=reshape_fast(self.x[i],self.x_shapes[i])
+        for i in range(len(self.y)):
+            self.y[i]=reshape_fast(self.y[i],self.y_shapes[i])
+        
+        self.w_list=None
+        self.x_list=None
+        self.y_list=None
+        self.readthread=None
+        
+        
+    def readTreeFromRootToTuple(self, filenames, limit=None, branches=None):
+        '''
+        To be used to get the initial tupel for further processing in inherting classes
+        Makes sure the number of entries is properly set
+        
+        can also read a list of files (e.g. to produce weights/removes from larger statistics
+        (not fully tested, yet)
+        '''
+        if  branches==None:
+            branches=self.allbranchestoberead
+            
+        if branches is None or len(branches) == 0:
+            return numpy.array([],dtype='float32')
+            
+        #print(branches)
+        #remove duplicates
+        usebranches=list(set(branches))
+        tmpbb=[]
+        for b in usebranches:
+            if len(b):
+                tmpbb.append(b)
+        usebranches=tmpbb
+            
+        import ROOT
+        from root_numpy import tree2array, root2array
+        if isinstance(filenames, list):
+            for f in filenames:
+                fileTimeOut(f,120)
+            print('add files')
+            nparray = root2array(
+                filenames, 
+                treename = self.treename, 
+                stop = limit,
+                branches = usebranches
+                )
+            print('done add files')
+            return nparray
+            print('add files')
+        else:    
+            fileTimeOut(filenames,120) #give eos a minute to recover
+            rfile = ROOT.TFile(filenames)
+            tree = rfile.Get(self.treename)
+            if not self.nsamples:
+                self.nsamples=tree.GetEntries()
+            nparray = tree2array(tree, stop=limit, branches=usebranches)
+            return nparray
+        
+    def read_truthclasses(self,filename):
+        npy_array = self.readTreeFromRootToTuple(filename)
+        arl=[]
+        for c in self.truthclasses:
+            a = numpy.asarray(npy_array[c])
+            a = a.reshape((a.shape[0],1))
+            arl.append(a)
+            
+        return numpy.concatenate(arl,axis=-1)
+        
+    def make_means(self, nparray):
+        from preprocessing import meanNormProd
+        return meanNormProd(nparray)
+        
+    def produceMeansFromRootFile(self,filename, limit=500000):
+        from preprocessing import meanNormProd
+        nparray = self.readTreeFromRootToTuple(filename, limit=limit)
+        means = numpy.array([],dtype='float32')
+        if len(nparray):
+            means = self.make_means(nparray)
+        
+        del nparray
+        return means
+    
+    #overload if necessary
+    def make_empty_weighter(self):
+        weighter = Weighter() 
+        weighter.undefTruth = self.undefTruth
+        
+        if self.remove or self.weight:
+            weighter.setBinningAndClasses(
+                [self.weight_binX,self.weight_binY],
+                self.weightbranchX,self.weightbranchY,
+                self.truthclasses
+                )
+        return weighter
+
+       
+    def produceBinWeighter(self,filenames):
+        weighter = self.make_empty_weighter()
+        branches = [self.weightbranchX,self.weightbranchY]
+        branches.extend(self.truthclasses)
+        showprog=ShowProgress(5,len(filenames))
+        counter=0
+        if self.remove or self.weight:
+            for fname in filenames:
+                fileTimeOut(fname, 120)
+                nparray = self.readTreeFromRootToTuple(fname, branches=branches)
+                weighter.addDistributions(nparray)
+                #del nparray
+                showprog.show(counter)
+                counter=counter+1
+            weighter.createRemoveProbabilitiesAndWeights(self.referenceclass)
+        return weighter
+    
+        
+    
+
+    def _normalize_input_(self, weighter, npy_array, oversample=1):
+        weights = None
+        if self.weight:
+            weights=weighter.getJetWeights(npy_array)
+            self.w = [weights for _ in self.y]
+        elif self.remove:
+            x_in=self.x
+            y_in=self.y
+            for i in range(oversample):
+                notremoves=weighter.createNotRemoveIndices(npy_array)
+                if self.undefTruth:
+                    undef=npy_array[self.undefTruth].sum(axis=1)
+                    notremoves-=undef
+                print(' to created remove indices', i)
+                weights=notremoves
+                
+                print('remove', i)
+                if not i:
+                    self.x = [x[notremoves > 0] for x in x_in]
+                    self.y = [y[notremoves > 0] for y in y_in]
+                else:
+                    self.x = [self.x[i].concatenate(x_in[i][notremoves > 0]) for i in range(len(self.x))]
+                    self.y = [self.y[i].concatenate(y_in[i][notremoves > 0]) for i in range(len(self.y))]
+                    
+            self.w = [numpy.zeros(self.x[0].shape)+1 for _ in self.y]
+                    
+                
+            newnsamp=self.x[0].shape[0]
+            print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
+            self.nsamples = newnsamp
+        else:
+            print('neither remove nor weight')
+            weights=numpy.empty(self.nsamples)
+            weights.fill(1.)
+            self.w = [weights for _ in self.y]        
+
+        
+
diff --git a/Weighter.py b/Weighter.py
index f47d5fc..663bc7d 100644
--- a/Weighter.py
+++ b/Weighter.py
@@ -1,10 +1,10 @@
 '''
 Created on 26 Feb 2017
-
 @author: jkiesele
 '''
 
 from __future__ import print_function
+import numpy as np
 
 import matplotlib
 #if no X11 use below
@@ -23,21 +23,36 @@ def __init__(self):
         self.removeProbabilties=[]
         self.binweights=[]
         self.distributions=[]
-        self.xedges=[]
-        self.yedges=[]
+        self.red_distributions=[]
+        self.xedges=[np.array([])]
+        self.yedges=[np.array([])]
         self.classes=[]
+        self.red_classes=[]
+        self.class_weights=[] 
         self.refclassidx=0
         self.undefTruth=[]
-    
+        self.truth_red_fusion = []
+
     def __eq__(self, other):
         'A == B'
+        def _all(x):
+            if hasattr(x, 'all'):
+                return x.all()
+            if hasattr(x, '__iter__'):
+                return all(x)
+            else: return x
+            
         def comparator(this, that):
             'compares lists of np arrays'
-            return all((i == j).all() for i,j in zip(this, that))
+            return _all((i == j).all() for i,j in zip(this, that))
+        
+        #empty
+        if len(self.Axixandlabel) == len(other.Axixandlabel) and len(self.Axixandlabel) == 0:
+            return True
         
         return self.Axixandlabel == other.Axixandlabel and \
-           all(self.axisX == other.axisX) and \
-           all(self.axisY == other.axisY) and \
+           _all(self.axisX == other.axisX) and \
+           _all(self.axisY == other.axisY) and \
            comparator(self.hists, other.hists) and \
            comparator(self.removeProbabilties, other.removeProbabilties) and \
            self.classes == other.classes and \
@@ -45,24 +60,35 @@ def comparator(this, that):
            self.undefTruth == other.undefTruth and \
            comparator(self.binweights, other.binweights) and \
            comparator(self.distributions, other.distributions) and \
-           (self.xedges == other.xedges).all() and \
-           (self.yedges == other.yedges).all()
+           _all(self.xedges == other.xedges) and \
+           _all(self.yedges == other.yedges)
     
     def __ne__(self, other):
         'A != B'
         return not (self == other)
         
-    def setBinningAndClasses(self,bins,nameX,nameY,classes):
+    def setBinningAndClasses(self,bins,nameX,nameY,classes, red_classes = [-1], truth_red_fusion = [-1], method='isB'):
+
+        if method == 'flatten' and red_classes == [-1]:
+            raise Exception('You didnt define the reduced classes for the flatten method correctly. Create a list with your reduced classes and call it in the setBinningAndClasses function with red_classes = ')
+        if method == 'flatten' and truth_red_fusion == [-1]:
+            raise Exception('You didnt define the fusion for the truth classes for the flatten method correctly. Create a list where each entry is also a list with all the truth classes to fusion into a reduced class. The entries of the reduced classes and fusion list must follow the same order, ie : the truth classes to fusion for the first reduced class is the first element of your fusion list. Then call it in the setBinningAndClasses function with thruth_red_fusion = ')
+
         self.axisX= bins[0]
         self.axisY= bins[1]
         self.nameX=nameX
         self.nameY=nameY
         self.classes=classes
+        self.red_classes = red_classes
+        self.truth_red_fusion = truth_red_fusion
         if len(self.classes)<1:
             self.classes=['']
+        if len(self.red_classes)<1:
+            self.red_classes=['']
+        if len(self.truth_red_fusion)<1:
+            self.truth_red_fusion=['']
         
-    def addDistributions(self,Tuple):
-        import numpy
+    def addDistributions(self,Tuple, norm_h = True):
         selidxs=[]
         
         ytuple=Tuple[self.nameY]
@@ -75,49 +101,52 @@ def addDistributions(self,Tuple):
             for c in self.classes:
                 selidxs.append(labeltuple[c]>0)
         else:
-            selidxs=[numpy.zeros(len(xtuple),dtype='int')<1]
-            
-        
-        for i in range(len(self.classes)):
-            tmphist,xe,ye=numpy.histogram2d(xtuple[selidxs[i]],ytuple[selidxs[i]],[self.axisX,self.axisY],normed=True)
+            selidxs=[np.zeros(len(xtuple),dtype='int')<1]
+                    
+        for i, label in enumerate(self.classes):
+            #print('axis-X binning :')
+            #print(self.axisX)
+            #print('axis-Y binning :')
+            #print(self.axisY)
+            tmphist,xe,ye=np.histogram2d(xtuple[selidxs[i]],ytuple[selidxs[i]],[self.axisX,self.axisY],normed=norm_h)
             self.xedges=xe
             self.yedges=ye
             if len(self.distributions)==len(self.classes):
                 self.distributions[i]=self.distributions[i]+tmphist
             else:
                 self.distributions.append(tmphist)
-            
+                        
     def printHistos(self,outdir):
-        import numpy
-        def plotHist(hist,outname):
+        def plotHist(hist,outname, histname):
             import matplotlib.pyplot as plt
             H=hist.T
-            fig = plt.figure()
-            ax = fig.add_subplot(111)
-            X, Y = numpy.meshgrid(self.xedges, self.yedges)
-            ax.pcolormesh(X, Y, H)
+            fig, ax0 = plt.subplots()
+            X, Y = np.meshgrid(self.xedges, self.yedges)
+            im = ax0.pcolormesh(X, Y, H)
+            #fig.colorbar(im, ax=ax)
             if self.axisX[0]>0:
-                ax.set_xscale("log", nonposx='clip')
+                ax0.set_xscale("log", nonposx='clip')
             else:
-                ax.set_xlim([self.axisX[1],self.axisX[-1]])
-                ax.set_xscale("log", nonposx='mask')
-            #plt.colorbar()
+                ax0.set_xlim([self.axisX[1],self.axisX[-1]])
+                ax0.set_xscale("log", nonposx='mask')
+            plt.colorbar(im, ax = ax0)
+            ax0.set_title(histname)
             fig.savefig(outname)
             plt.close()
             
-        for i in range(len(self.classes)):
-            if len(self.distributions):
-                plotHist(self.distributions[i],outdir+"/dist_"+self.classes[i]+".pdf")
-                plotHist(self.removeProbabilties[i] ,outdir+"/remprob_"+self.classes[i]+".pdf")
-                plotHist(self.binweights[i],outdir+"/weights_"+self.classes[i]+".pdf")
-                reshaped=self.distributions[i]*self.binweights[i]
-                plotHist(reshaped,outdir+"/reshaped_"+self.classes[i]+".pdf")
-            
+        for i in range(len(self.red_classes)):
+            if len(self.red_distributions):
+                plotHist(self.red_distributions[i],outdir+"/dist_"+self.red_classes[i]+".png",self.red_classes[i]+" distribution")
+                #plotHist(self.removeProbabilties[i] ,outdir+"/remprob_"+self.classes[i]+".pdf")
+                #plotHist(self.binweights[i],outdir+"/weights_"+self.classes[i]+".pdf")
+                #reshaped=self.distributions[i]*self.binweights[i]
+                #plotHist(reshaped,outdir+"/reshaped_"+self.classes[i]+".pdf")
         
     def createRemoveProbabilitiesAndWeights(self,referenceclass='isB'):
-        import numpy
+        
         referenceidx=-1
-        if not referenceclass=='flatten':
+        
+        if referenceclass != 'flatten':
             try:
                 referenceidx=self.classes.index(referenceclass)
             except:
@@ -131,16 +160,30 @@ def createRemoveProbabilitiesAndWeights(self,referenceclass='isB'):
         
         self.refclassidx=referenceidx
         
-        refhist=numpy.zeros((len(self.axisX)-1,len(self.axisY)-1), dtype='float32')
+        refhist=np.zeros((len(self.axisX)-1,len(self.axisY)-1), dtype='float32')
         refhist += 1
         
         if referenceidx >= 0:
             refhist=self.distributions[referenceidx]
-            refhist=refhist/numpy.amax(refhist)
-        
+            refhist=refhist/np.amax(refhist)
+        
+        if referenceclass == 'flatten':
+            temp = []
+            for k in range(len(self.red_classes)):
+                temp.append(0)
+                for i, label in enumerate(self.classes):
+                    if label in self.truth_red_fusion[k]:
+                        temp[k] = temp[k] + self.distributions[i]
+
+            for j in range(len(temp)):
+                threshold_ = np.median(temp[j][temp[j] > 0]) * 0.01
+                nonzero_vals = temp[j][temp[j] > threshold_]
+                ref_val = np.percentile(nonzero_vals, 25)
+
+            self.red_distributions = temp
     
         def divideHistos(a,b):
-            out=numpy.array(a)
+            out=np.array(a)
             for i in range(a.shape[0]):
                 for j in range(a.shape[1]):
                     if b[i][j]:
@@ -149,46 +192,80 @@ def divideHistos(a,b):
                         out[i][j]=-10
             return out
                 
+        reweight_threshold = 15
+        max_weight = 1
+        raw_hists = {}
+        class_events = {}
+        result = {}
+
         probhists=[]
         weighthists=[]
+
+        if referenceclass=='flatten':
+            for i, label in enumerate(self.red_classes):
+                raw_hists[label] = self.red_distributions[i].astype('float32')
+                result[label] = self.red_distributions[i].astype('float32')    
+            
+            for label, classwgt in zip(self.red_classes, self.class_weights):
+                hist = result[label]
+                threshold_ = np.median(hist[hist > 0]) * 0.01
+                nonzero_vals = hist[hist > threshold_]
+                ref_val = np.percentile(nonzero_vals, reweight_threshold)
+                # wgt: bins w/ 0 elements will get a weight of 0; bins w/ content<ref_val will get 1
+                wgt = np.clip(np.nan_to_num(ref_val / hist, posinf=0), 0, 1)
+                result[label] = wgt
+                # divide by classwgt here will effective increase the weight later
+                class_events[label] = np.sum(raw_hists[label] * wgt) / classwgt
+                
+            min_nevt = min(class_events.values()) * max_weight
+            for label in self.red_classes:
+                class_wgt = float(min_nevt) / class_events[label]
+                result[label] *= class_wgt
+                
+            for label in self.classes:
+                for i, red_label in enumerate(self.red_classes):
+                    if label in self.truth_red_fusion[i]:
+                        weighthists.append(result[red_label])
+                        probhists.append(1 - result[red_label])                        
+                    
+            self.removeProbabilties=probhists
+            self.binweights=weighthists
         
-        for i in range(len(self.classes)):
-            #print(self.classes[i])
-            tmphist=self.distributions[i]
-            #print(tmphist)
-            #print(refhist)
-            if numpy.amax(tmphist):
-                tmphist=tmphist/numpy.amax(tmphist)
-            else:
-                print('Warning: class '+self.classes[i]+' empty.')
-            ratio=divideHistos(refhist,tmphist)
-            ratio=ratio/numpy.amax(ratio)#norm to 1
-            #print(ratio)
-            ratio[ratio<0]=1
-            ratio[ratio==numpy.nan]=1
-            weighthists.append(ratio)
-            ratio=1-ratio#make it a remove probability
-            probhists.append(ratio)
-        
-        self.removeProbabilties=probhists
-        self.binweights=weighthists
-        
-        #make it an average 1
-        for i in range(len(self.binweights)):
-            self.binweights[i]=self.binweights[i]/numpy.average(self.binweights[i])
-    
-    
-        
-        
+        else:
+            for i in range(len(self.classes)):
+                #print(self.classes[i])
+                tmphist=self.distributions[i]
+                #print(tmphist)
+                #print(refhist)
+                if np.amax(tmphist):
+                    tmphist=tmphist/np.amax(tmphist)
+                else:
+                    print('Warning: class '+self.classes[i]+' empty.')
+                ratio=divideHistos(refhist,tmphist)
+                ratio=ratio/np.amax(ratio)#norm to 1
+                #print(ratio)
+                ratio[ratio<0]=1
+                ratio[ratio==np.nan]=1
+                ratio = ratio
+                weighthists.append(ratio)
+                ratio=1-ratio#make it a remove probability
+                probhists.append(ratio)
+
+            self.removeProbabilties=probhists
+            self.binweights=weighthists
+
+            #make it an average 1
+            for i in range(len(self.binweights)):
+                self.binweights[i]=self.binweights[i]/np.average(self.binweights[i])
+              
     def createNotRemoveIndices(self,Tuple):
-        import numpy
+        
         if len(self.removeProbabilties) <1:
-            print('removeProbabilties bins not initialised. Cannot create indices per jet')
             raise Exception('removeProbabilties bins not initialised. Cannot create indices per jet')
         
         tuplelength=len(Tuple)
         
-        notremove=numpy.zeros(tuplelength)
+        notremove=np.zeros(tuplelength)
         counter=0
         xaverage=[]
         norm=[]
@@ -200,16 +277,14 @@ def createNotRemoveIndices(self,Tuple):
             xaverage.append(0)
             norm.append(0)
             yaverage.append(0)
-            
         
-
         for jet in iter(Tuple[self.Axixandlabel]):
             binX =  self.getBin(jet[self.nameX], self.axisX)
             binY =  self.getBin(jet[self.nameY], self.axisY)
             
             for index, classs in enumerate(self.classes):
                 if  useonlyoneclass or 1 == jet[classs]:
-                    rand=numpy.random.ranf()
+                    rand=np.random.ranf()
                     prob = self.removeProbabilties[index][binX][binY]
                     
                     if rand < prob and index != self.refclassidx:
@@ -222,7 +297,10 @@ def createNotRemoveIndices(self,Tuple):
                         yaverage[index]+=jet[self.nameY]
                         norm[index]+=1
             
-                    counter=counter+1            
+                    counter += 1
+                    break
+            else:
+                counter += 1
         
             
         if not len(notremove) == counter:
@@ -234,12 +312,11 @@ def createNotRemoveIndices(self,Tuple):
     
         
     def getJetWeights(self,Tuple):
-        import numpy
         countMissedJets = 0  
         if len(self.binweights) <1:
             raise Exception('weight bins not initialised. Cannot create weights per jet')
         
-        weight = numpy.zeros(len(Tuple))
+        weight = np.zeros(len(Tuple))
         jetcount=0
         
         useonlyoneclass=len(self.classes)==1 and len(self.classes[0])==0
@@ -270,6 +347,3 @@ def getBin(self,value, bins):
                 return index-1            
         #print (' overflow ! ', value , ' out of range ' , bins)
         return bins.size-2
-
-        
-        
diff --git a/__init__.py b/__init__.py
index e69de29..f378ad3 100644
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,15 @@
+
+
+        
+import sys
+import tensorflow as tf
+sys.modules["keras"] = tf.keras
+
+__version__ = '3.2'
+
+#shortcuts 
+from .TrainData import TrainData
+from .SimpleArray import SimpleArray
+from .DataCollection import DataCollection
+from .Weighter import Weighter
+
diff --git a/bin/.gitignore b/bin/.gitignore
new file mode 100644
index 0000000..3fc465f
--- /dev/null
+++ b/bin/.gitignore
@@ -0,0 +1 @@
+make_example_data
diff --git a/bin/addPredictionLabels.py b/bin/addPredictionLabels.py
deleted file mode 100755
index a42e071..0000000
--- a/bin/addPredictionLabels.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-from argparse import ArgumentParser
-from DeepJetCore.DataCollection import DataCollection
-
-parser = ArgumentParser('add custom prediction labels to a dataCollection. Not necessary in the standard workflow')
-parser.add_argument('inputDataCollection')
-parser.add_argument('--use', help='comma-separated list of prediction labels to be used')
-parser.add_argument('outputDataCollection')
-args = parser.parse_args()
-if not args.use:
-    raise Exception('labels to be injected must be specified')
-
-labels= [i for i in args.use.split(',')]
-    
-print('reading data collection')
-
-dc=DataCollection()
-dc.readFromFile(args.inputDataCollection)
-print('adding labels:')
-print(labels)
-dc.defineCustomPredictionLabels(labels)
-dc.writeToFile(args.outputDataCollection)
diff --git a/bin/batch_conversion.py b/bin/batch_conversion.py
index d5ffdce..4d93184 100755
--- a/bin/batch_conversion.py
+++ b/bin/batch_conversion.py
@@ -1,85 +1,116 @@
-#! /bin/env python
+#!/bin/env python3
 
-from argparse import ArgumentParser
-from pdb import set_trace
-import subprocess
 import os
+import logging
+from argparse import ArgumentParser
+
+logging.getLogger().setLevel(logging.INFO)
 
 parser = ArgumentParser('program to convert root tuples to traindata format')
 parser.add_argument("infile", help="set input sample description (output from the check.py script)", metavar="FILE")
 parser.add_argument("nchunks", type=int, help="number of jobs to be submitted")
 parser.add_argument("out", help="output path")
 parser.add_argument("batch_dir", help="batch directory")
-parser.add_argument("-c", help="output class")
+parser.add_argument("-c", help="output class", default="")
+parser.add_argument("--classArgs",  help="Arguments to pass to output class")
 parser.add_argument("--testdatafor", default='')
-parser.add_argument("--nomeans", action='store_true', help='where to get means/std, in case already computed')
+parser.add_argument("--nforweighter", default='500000', help='set number of samples to be used for weight and mean calculation')
+parser.add_argument("--meansfrom", default="", help='where to get means/std, in case already computed')
+parser.add_argument("--useexistingsplit", default=False, help='use an existing file split (potentially dangerous)')
+parser.add_argument("--noRelativePaths", help="Assume input samples are absolute paths with respect to working directory", default=False, action="store_true")
+parser.add_argument("--jobFlavour", default='longlunch', help="CERN HTCondor job flavour (espresso, microcentury, longlunch, workday)")
+parser.add_argument("--cmst3", action="store_true", help="Submit jobs with cmst3 accounting group.")
 args = parser.parse_args()
 
-deep_jet_base = [i for i in os.environ['PYTHONPATH'].split(':') if 'DeepJet' in i]
-if len(deep_jet_base) != 1:
-   raise RuntimeError('I cannot find the project root directory')
-deep_jet_base = os.path.realpath(deep_jet_base[0].split('environment')[0])
+args.infile = os.path.abspath(args.infile)
+args.out = os.path.abspath(args.out)
+args.batch_dir = os.path.abspath(args.batch_dir)
 
-proc = subprocess.Popen(
-   'voms-proxy-info', 
-   stdout=subprocess.PIPE, 
-   stderr=subprocess.PIPE
-)
-if proc.wait() <> 0:
-   print "You should have a valid grid proxy to run this!"
-   exit()
+if len(args.c)<1:
+    print("please specify and output class")
+    exit(-1)
+
+
+deep_jet_base = os.environ['DEEPJETCORE_SUBPACKAGE']
+if len(deep_jet_base) < 1:
+   raise RuntimeError('I cannot find the project root directory. DEEPJETCORE_SUBPACKAGE needs to be defined')
 
-if not os.path.isdir(args.batch_dir):
-   os.mkdir(args.batch_dir)
+
+deep_jet_base_name = os.path.basename(deep_jet_base)
+deep_jet_core  = os.path.abspath((os.environ['DEEPJETCORE']))
+
+
+if os.path.isdir(args.out):
+    print ("output dir must not exists")
+    exit(-2)
+
+if os.path.isdir(args.batch_dir):
+    print ("batch dir must not exists")
+os.mkdir(args.batch_dir)
 
 if not os.path.isdir('%s/batch' % args.batch_dir):
-   os.mkdir('%s/batch' % args.batch_dir)
-
-if not (args.nomeans or args.testdatafor):
-   #Run a fisrt round of root conversion to get the means/std and weights
-   cmd = [
-      './convertFromRoot.py', 
-      '-i', args.infile,
-      '-c', args.c, 
-      '-o', args.out, 
-      '--means'
-      ]
-   proc  = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-   out, err = proc.communicate()
-   code = proc.wait()
+    os.mkdir('%s/batch' % args.batch_dir)   
    
-   if code != 0:
-      raise RuntimeError('The first round of root conversion failed with message: \n\n%s' % err)
 
+if not (len(args.meansfrom) or args.testdatafor):
+    #Run a fisrt round of root conversion to get the means/std and weights
+    print('creating a dummy datacollection for means/norms and weighter (can take a while)...')
+
+    from DeepJetCore.DataCollection import DataCollection
+    from DeepJetCore.conversion.conversion import class_options
+
+    try:
+        cls = class_options[args.c]
+    except KeyError:
+        raise Exception('wrong class selection')
 
-inputs = [i for i in open(args.infile)]
+    if not args.classArgs:
+        args.classArgs = tuple()
 
-def chunkify(l, n):
-   """Yield successive n-sized chunks from l."""
-   for i in range(0, len(l), n):
-      yield l[i:i + n]
+    dc = DataCollection(nprocs=-1)
+    dc.meansnormslimit = int(args.nforweighter)
+    try:
+        dc.convertListOfRootFiles(args.infile, cls(*args.classArgs), args.out,
+                                  means_only=True,
+                                  output_name='batch_template.dc',
+                                  relpath=('' if args.noRelativePaths else os.path.dirname(os.path.realpath(args.infile)))
+        )
+    
+    except:
+        print 'The first round of root conversion failed'
+        raise
+
+    print('means/norms/weighter produced successfully')
+
+elif args.meansfrom:
+    if not os.path.exists(args.meansfrom):
+        raise Exception("The file "+args.meansfrom+" does not exist")
+    print('using means/weighter from '+args.meansfrom)
+    os.mkdir(args.out)
+    os.system('cp '+args.meansfrom+' '+args.out+'/batch_template.dc')
 
 if not args.infile.endswith('.txt'):
-   raise ValueError('The code assumes that the input files has .txt extension')
-
-txt_template = args.infile.replace('.txt', '.%s.txt')
-batch_txts = []
-nchunks = 0
-for idx, chunk in enumerate(chunkify(inputs, len(inputs)/args.nchunks)):
-   name = txt_template % idx
-   batch_txts.append(name)
-   with open(name, 'w') as cfile:
-      cfile.write(''.join(chunk))
-   nchunks = idx
+    raise ValueError('The code assumes that the input files has .txt extension')
+
+with open(args.infile) as source:
+    num_inputs = len(source.read().split('\n'))
+
+chunk_size = num_inputs / args.nchunks
+
+print('splitting input file...')
+range_indices = []
+
+for idx, start in enumerate(range(0, num_inputs, chunk_size)):
+    range_indices.append((idx, start, start + chunk_size))
 
 batch_template = '''#!/bin/bash
-sleep $(shuf -i1-600 -n1) #sleep a random amount of time between 1s and 10' to avoid bottlenecks in reaching afs
+#sleep $(shuf -i1-300 -n1) #sleep a random amount of time between 1s and 10' to avoid bottlenecks in reaching afs
 echo "JOBSUB::RUN job running"
 trap "echo JOBSUB::FAIL job killed" SIGTERM
-cd {DJ}/environment/
-source lxplus_env.sh
-cd {DJ}/convertFromRoot/
-./convertFromRoot.py "$@"
+BASEDIR=`pwd`
+cd {subpackage}
+source env.sh
+convertFromSource.py "$@"
 exitstatus=$?
 if [ $exitstatus != 0 ]
 then
@@ -87,31 +118,46 @@ def chunkify(l, n):
 else
 echo JOBSUB::SUCC job ended sucessfully
 fi
-'''.format(DJ=deep_jet_base)
+'''.format(subpackage=deep_jet_base)
 batch_script = '%s/batch.sh' % args.batch_dir
 with open(batch_script, 'w') as bb:
-   bb.write(batch_template)
+    bb.write(batch_template)
+
+options = []
+if args.noRelativePaths:
+    options.append('--noRelativePaths')
+if args.testdatafor:
+    options.append('--testdatafor ' + args.testdatafor)
+else:
+    options.append('--usemeansfrom %s/batch_template.dc' % os.path.realpath(args.out))
+
+option = ' '.join(options)
 
-means_file = '%s/batch_template.dc' % os.path.realpath(args.out) if not args.testdatafor else args.testdatafor
-option = '--usemeansfrom' if not args.testdatafor else '--testdatafor'
 with open('%s/submit.sub' % args.batch_dir, 'w') as bb:
-   bb.write('''
-executable            = {EXE}
-arguments             = -i {INFILE} -c {CLASS} -o {OUT} --nothreads --batch conversion.$(ProcId).dc {OPTION} {MEANS}
-output                = batch/con_out.$(ProcId).out
-error                 = batch/con_out.$(ProcId).err
-log                   = batch/con_out.$(ProcId).log
-send_credential       = True
+    bb.write('''executable            = {EXE}
+arguments             = -i {INFILE} --inRange $(START) $(STOP) -c {CLASS} -o {OUT} --nothreads --batch conversion.$(JOBIDX).dc {OPTION}
+output                = {BATCH_DIR}/batch/con_out.$(JOBIDX).out
+error                 = {BATCH_DIR}/batch/con_out.$(JOBIDX).err
+log                   = {BATCH_DIR}/batch/con_out.$(JOBIDX).log
+#+MaxRuntime = 86399
++JobFlavour = "{FLAVOUR}"
 getenv = True
-use_x509userproxy = True
-queue {NJOBS}
-'''.format(
-   EXE = os.path.realpath(batch_script),
-   NJOBS = nchunks,
-   INFILE = txt_template % '$(ProcId)',
-   CLASS = args.c,
-   OUT = os.path.realpath(args.out),
-   OPTION = option,
-   MEANS = means_file,
+#use_x509userproxy = True
+accounting_group = {ACCTGRP}
++AccountingGroup = {ACCTGRP}   
+queue JOBIDX START STOP from (
+{RANGE_INDICES}
 )
-   )
+'''.format(
+    EXE = os.path.realpath(batch_script),
+    INFILE = args.infile,
+    CLASS = args.c,
+    OUT = os.path.realpath(args.out),
+    OPTION = option,
+    BATCH_DIR = args.batch_dir,
+    FLAVOUR = args.jobFlavour,
+    ACCTGRP = 'group_u_CMST3.all' if args.cmst3 else 'group_u_CMS.u_zh',
+    RANGE_INDICES = '\n'.join('%d %d %d' % rng for rng in range_indices)
+))
+   
+print('condor submit file can be found in '+ args.batch_dir+'\nuse check_conversion.py ' + args.batch_dir + ' to to check jobs')
diff --git a/bin/check_conversion.py b/bin/check_conversion.py
index 93ce4fb..9feccee 100755
--- a/bin/check_conversion.py
+++ b/bin/check_conversion.py
@@ -1,4 +1,4 @@
-#! /bin/env python
+#! /bin/env python3
 
 from argparse import ArgumentParser
 from pdb import set_trace
@@ -7,51 +7,98 @@
 import os
 
 def grep(fname, pattern):
-   with open(fname) as infile:
-      for line in infile:
-         if pattern in line:
-            return True
-   return False
+    with open(fname) as infile:
+        for line in infile:
+            if pattern in line:
+                return True
+    return False
 
-parser = ArgumentParser('program to convert root tuples to traindata format')
+parser = ArgumentParser('program to check batch conversion of root tuples to traindata format')
 parser.add_argument("indir", help="input dir of the batch task", metavar="FILE")
 args = parser.parse_args()
 
-sub_lines = [i for i in open('%s/submit.sub' % args.indir)]
-general_lines = []
-proc_lines = []
-for line in sub_lines:
-   if '$(ProcId)' in line:
-      proc_lines.append(line.replace('$(ProcId)', '{IDX}'))
-   elif 'queue ' in line:
-      pass #nothing to be done
-   else:
-      general_lines.append(line)
-proc_lines.append('queue\n')
-proc_lines = ''.join(proc_lines)
-
 outputs = glob.glob('%s/batch/con_out.*.out' % args.indir)
+if len(outputs)<1:
+    print('no jobs submitted, please check')
+    exit(-1)
+
 failed = [i for i in outputs if not grep(i, 'JOBSUB::SUCC')]
+successful=[  i.split(".")[-2]   for i in outputs if grep(i, 'JOBSUB::SUCC')]
+
+def get_output_dir():
+    batch_args = [i for i in open('%s/submit.sub' % args.indir) if 'arguments' in i][0]
+    batch_args = batch_args.split('=')[1].split(' ')
+    output_dir = [batch_args[i+1] for i in range(len(batch_args)) if batch_args[i].startswith('-o')][0]
+    return output_dir
+
+def merge_successful():
+    output_dir=get_output_dir()
+    from DeepJetCore.DataCollection import DataCollection
+    alldc=[]
+    for s in successful:
+        in_path=output_dir+'/conversion.'+str(s)+'.dc'
+        dc=None
+        try: 
+            dc=DataCollection(in_path)
+        except Exception as e:
+            print('problems adding '+in_path+" will continue nevertheless... (error see below)")
+            print(e)
+            dc=0
+        if dc:
+            alldc.append(DataCollection(in_path))
+    print("merging DataCollections")
+    merged = alldc[0]
+    merged_c=1
+    for i in range(1,len(alldc)):
+        try: 
+            merged += alldc[i]
+            merged_c+=1
+        except Exception as e:
+            print(e)
+            print('...continue adding nevertheless')
+        
+    if merged_c != len(alldc):
+        print('lost '+str(100* (1. - float(merged_c)/float(len(alldc)))) +'%')
+    print("saving merged DataCollection")
+    merged.writeToFile('%s/dataCollection.dc' % output_dir)
+    print('successfully merged to %s/dataCollection.dc' % output_dir)
+    return merged
 
 if len(failed) == 0:
-   print 'All jobs successfully completed, merging...'
-   from DataCollection import DataCollection
-   from glob import glob
-   batch_args = [i for i in open('%s/submit.sub' % args.indir) if 'arguments' in i][0]
-   batch_args = batch_args.split('=')[1].split('-')
-   output_dir = [i for i in batch_args if i.startswith('o ')][0].split(' ')[1]
-   merged = sum(DataCollection(i) for i in glob('%s/conversion.*.dc' % output_dir))
-   dname = os.path.dirname(merged.originRoots[0])
-   infiles = glob('%s/*.root' % dname)
-   if len(infiles) != len(merged.originRoots):
-      print '\n\n\nThere are missing files that were not converted, maybe something went wrong!\n\n\n'
-   merged.writeToFile('%s/dataCollection.dc' % output_dir)
+    print 'All jobs successfully completed, merging...'
+    merged=merge_successful()
+    dname = os.path.dirname(merged.originRoots[0])
+    infiles = glob.glob('%s/*.root' % dname)
+    if len(infiles) != len(merged.originRoots):
+        print '\n\n\nThere are missing files that were not converted, maybe something went wrong!\n\n\n'
+   
 else:
-   keep_going = raw_input('%d/%d jobs have failed, should I recover them? [yY/nN]   ' % (len(failed), len(outputs)))
-   if keep_going.lower() == 'n': exit(0)
-
-   idxs = [os.path.basename(i).split('.')[1] for i in failed]
-   with open('%s/rescue.sub' % args.indir, 'w') as jdl:
-      jdl.write(''.join(general_lines))   
-      jdl.write('\n'.join([proc_lines.format(IDX=i) for i in idxs]))
-   print 'rescue file created'
+    keep_going = raw_input('%d/%d jobs have failed, should I recover them? [yY/nN]   ' % (len(failed), len(outputs)))
+    if keep_going.lower() == 'n': 
+        merge_anyways = raw_input('Should I merge the sucessfully converted files (%d)? [yY/nN]    ' % len(successful))
+        if merge_anyways.lower() == 'n': exit(0)
+        merge_successful()
+        exit(0)
+
+    general_lines = []
+    proc_lines = []
+        
+    with open('%s/submit.sub' % args.indir) as submit_file:
+        parse_procs = False
+        for line in submit_file:
+       
+            if parse_procs:
+                if line.startswith(')'):
+                    break
+                proc_lines.append(line.strip())
+            else:
+                general_lines.append(line)
+                if 'queue ' in line:
+                    parse_procs = True
+    
+    idxs = [int(os.path.basename(i).split('.')[1]) for i in failed]
+    with open('%s/rescue.sub' % args.indir, 'w') as jdl:
+        jdl.write(''.join(general_lines))
+        jdl.write('\n'.join(proc_lines[i] for i in idxs))
+        jdl.write('\n)\n')
+    print 'rescue file created'
diff --git a/bin/convertDCFromPreviousMinorVersion.py b/bin/convertDCFromPreviousMinorVersion.py
new file mode 100755
index 0000000..a9b3167
--- /dev/null
+++ b/bin/convertDCFromPreviousMinorVersion.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+'''
+
+@author:     jkiesele
+
+'''
+
+from argparse import ArgumentParser
+import os
+parser = ArgumentParser('simple program to convert old data set to the new format')
+parser.add_argument("infile", help="input \"dc\" file")
+parser.add_argument("--in_place", help="replace files in place: warning, no backups are created",default=False, action="store_true")
+# process options
+args=parser.parse_args()
+
+
+
+from DeepJetCore import DataCollection, TrainData
+infile=args.infile
+
+dc=DataCollection(infile)
+inpath = dc.dataDir
+
+insamples = [dc.getSamplePath(s) for s in dc.samples]
+
+for s in insamples:
+    if not args.in_place:
+        os.system('cp '+s+' '+s+'.backup')
+    td=TrainData()
+    td.readFromFile(s)
+    td.writeToFile(s)
\ No newline at end of file
diff --git a/bin/convertDCtoNumpy.py b/bin/convertDCtoNumpy.py
index 89fc3d5..25807c5 100755
--- a/bin/convertDCtoNumpy.py
+++ b/bin/convertDCtoNumpy.py
@@ -1,30 +1,31 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # encoding: utf-8
 
 from argparse import ArgumentParser
-from DeepJetCore.DataCollection import DataCollection
 
 parser = ArgumentParser('convert a data collection to a single set of numpy arrays. Warning, this can produce a large output')
 parser.add_argument('inputDataCollection')
 parser.add_argument('outputFilePrefix')
+parser.add_argument("--nfiles",  help="select number of files to be converted, default is every file", type=int, default=-1)
 args = parser.parse_args()
 
+
+from DeepJetCore.DataCollection import DataCollection
+
 print('reading data collection')
 
 dc=DataCollection()
 dc.readFromFile(args.inputDataCollection)
-
+nfiles = args.nfiles
 print('producing feature array')
-feat=dc.getAllFeatures()
+feat=dc.getAllFeatures(nfiles=nfiles)
 
 print('producing truth array')
-truth=dc.getAllLabels()
+truth=dc.getAllLabels(nfiles=nfiles)
 
 print('producing weight array')
-weight=dc.getAllWeights()
+weight=dc.getAllWeights(nfiles=nfiles)
 
-print('producing means and norms array')
-means=dc.means
 
 from numpy import save
 
@@ -38,4 +39,3 @@
 for i in range(len(weight)):
     save(args.outputFilePrefix+'_weights_'+str(i) +'.npy', weight[i])
     
-save(args.outputFilePrefix+'_meansandnorms.npy', means)
diff --git a/bin/convertFromRoot.py b/bin/convertFromRoot.py
deleted file mode 100755
index 17f840d..0000000
--- a/bin/convertFromRoot.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-'''
-convertFromRoot -- converts the root files produced with the deepJet ntupler to the data format used by keras for the DNN training
-
-convertFromRoot is a small program that converts the root files produced with the deepJet ntupler to the data format used by keras for the DNN training
-
-
-@author:     jkiesele
-
-'''
-
-import sys
-import os
-
-from argparse import ArgumentParser
-from pdb import set_trace
-import logging
-logging.getLogger().setLevel(logging.INFO)
-
-from DeepJetCore.DataCollection import DataCollection
-
-import imp
-try:
-    imp.find_module('datastructures')
-    from datastructures import *
-except ImportError:
-    print('datastructure modules not found. Please define a DeepJetCore submodule')
-   
-
-class_options=[]
-import inspect, sys
-for name, obj in inspect.getmembers(sys.modules['datastructures']):
-    if inspect.isclass(obj) and 'TrainData' in name:
-        class_options.append(obj)
-      
-class_options = dict((str(i).split("'")[1].split('.')[-1], i) for i in class_options)
-
-
-parser = ArgumentParser('program to convert root tuples to traindata format')
-parser.add_argument("-i", help="set input sample description (output from the check.py script)", metavar="FILE")
-parser.add_argument("--noRelativePaths", help="Assume input samples are absolute paths with respect to working directory", default=False, action="store_true")
-parser.add_argument("-o",  help="set output path", metavar="PATH")
-parser.add_argument("-c",  choices = class_options.keys(), help="set output class (options: %s)" % ', '.join(class_options.keys()), metavar="Class")
-parser.add_argument("--classArgs",  help="Arguments to pass to output class")
-parser.add_argument("-r",  help="set path to snapshot that got interrupted", metavar="FILE", default='')
-parser.add_argument("-n", default='', help="(optional) number of child processes")
-parser.add_argument("--testdatafor", default='')
-parser.add_argument("--usemeansfrom", default='')
-parser.add_argument("--nothreads", action='store_true')
-parser.add_argument("--means", action='store_true', help='compute only means')
-parser.add_argument("--batch", help='Provide a batch ID to be used')
-parser.add_argument("-v", action='store_true', help='verbose')
-parser.add_argument("-q", action='store_true', help='quiet')
-
-# process options
-args=parser.parse_args()
-infile=args.i
-outPath=args.o
-class_name=args.c    
-class_args=args.classArgs
-recover=args.r
-testdatafor=args.testdatafor
-usemeansfrom=args.usemeansfrom
-nchilds=args.n
-
-if args.batch and not (args.usemeansfrom or args.testdatafor):
-    raise ValueError(
-        'When running in batch mode you should also '
-        'provide a means source through the --usemeansfrom option'
-        )
-
-if args.v:
-    logging.getLogger().setLevel(logging.DEBUG)
-elif args.q:
-    logging.getLogger().setLevel(logging.WARNING)
-
-if infile:
-    logging.info("infile = %s" % infile)
-if outPath:
-    logging.info("outPath = %s" % outPath)
-
-# MAIN BODY #
-dc = DataCollection(nprocs = (1 if args.nothreads else -1), 
-                    useRelativePaths=True if not args.noRelativePaths else False)  
-if len(nchilds):
-    dc.nprocs=int(nchilds)  
-
-if class_name in class_options:
-    traind = class_options[class_name]
-elif not recover and not testdatafor:
-    print('available classes:')
-    for key, val in class_options.iteritems():
-        print(key)
-    raise Exception('wrong class selection')        
-if testdatafor:
-    logging.info('converting test data, no weights applied')
-    dc.createTestDataForDataCollection(
-        testdatafor, infile, outPath, 
-        outname = args.batch if args.batch else 'dataCollection.dc',
-        batch_mode = bool(args.batch)
-    )    
-elif recover:
-    dc.recoverCreateDataFromRootFromSnapshot(recover)        
-elif args.means:
-    dc.convertListOfRootFiles(
-        infile, traind(class_args) if class_args else traind(), outPath, 
-        means_only=True, output_name='batch_template.dc'
-        )
-else:
-    dc.convertListOfRootFiles(
-        infile, traind(class_args) if class_args else traind(), outPath, 
-        usemeansfrom, output_name = args.batch if args.batch else 'dataCollection.dc',
-        batch_mode = bool(args.batch)
-        )
-
-
-
-
diff --git a/bin/convertFromSource.py b/bin/convertFromSource.py
new file mode 100755
index 0000000..cfb4fc4
--- /dev/null
+++ b/bin/convertFromSource.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+'''
+
+@author:     jkiesele
+
+'''
+
+import sys
+import os
+import tempfile
+
+from argparse import ArgumentParser
+from pdb import set_trace
+import logging
+logging.basicConfig(format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
+logging.getLogger().setLevel(logging.INFO)
+
+
+parser = ArgumentParser('program to convert source files to traindata format')
+parser.add_argument("-i", help="input file list (required)", metavar="FILE", default='')
+parser.add_argument("-o",  help="set output path (required)", metavar="PATH", default='')
+parser.add_argument("-c",  help="set output class (required)", metavar="Class")
+parser.add_argument("--gpu", help="enable GPU usage for conversion", action='store_true', default=False)
+parser.add_argument("-r",  help="set path to snapshot that got interrupted", metavar="FILE", default='')
+parser.add_argument("--testdata", action='store_true', help='convert as test data')
+parser.add_argument("-n", default='', help="(optional) number of child processes")
+parser.add_argument("--nothreads", action='store_true', help='only spawn one process')
+parser.add_argument("--checkFiles", action='store_true', help="enables file checking (requires fileIsValid function of TrainData to be defined)")
+parser.add_argument("--noRelativePaths", help="Assume input samples are absolute paths with respect to working directory", default=False, action="store_true")
+parser.add_argument("--useweightersfrom", default='', help='(for test data or batching) use weighter objects from a different data collection')
+
+
+parser.add_argument("--inRange", nargs=2, type=int, help="(for batching) input line numbers")
+parser.add_argument("--means", action='store_true', help='(for batching) compute only means')
+parser.add_argument("--nforweighter", default='500000', help='set number of samples to be used for weighter object creation')
+parser.add_argument("--batch", help='(for batching) provide a batch ID to be used')
+parser.add_argument("--noramcopy", action='store_true', help='Do not copy input file to /dev/shm before conversion')
+parser.add_argument("-v", action='store_true', help='verbose')
+parser.add_argument("-q", action='store_true', help='quiet')
+
+# process options
+args=parser.parse_args()
+
+#first GPU
+if args.gpu:
+    import setGPU
+    
+from DeepJetCore.DataCollection import DataCollection
+from DeepJetCore.conversion.conversion import class_options
+
+infile=args.i
+outPath=args.o
+if (len(infile)<1 or len(outPath)<1) and not len(args.r):
+    parser.print_help()
+    exit()
+class_name=args.c    
+recover=args.r
+useweightersfrom=args.useweightersfrom
+nchilds=args.n
+dofilecheck=args.checkFiles
+testdata = args.testdata
+
+if args.gpu:
+    if (len(nchilds) and int(nchilds)>1) and (not args.nothreads):
+        print("WARNING: enabling gpu for conversion and processing multiple files in parallel could be an issue!")
+
+#fileIsValid
+
+if args.batch:
+    raise ValueError('batching not implemented at the moment.')
+
+if args.v:
+    logging.getLogger().setLevel(logging.DEBUG)
+elif args.q:
+    logging.getLogger().setLevel(logging.WARNING)
+
+if infile:
+    logging.info("infile = %s" % infile)
+if outPath:
+    logging.info("outPath = %s" % outPath)
+
+if args.noRelativePaths:
+    relpath = ''
+elif not recover:
+    relpath = os.path.dirname(os.path.realpath(infile))
+
+if args.inRange is not None:
+    with tempfile.NamedTemporaryFile(delete=False, dir=os.getenv('TMPDIR', '/tmp')) as my_infile:
+        with open(infile) as source:
+            do_write = False
+            for iline, line in enumerate(source):
+                if iline == args.inRange[0]:
+                    do_write = True
+                elif iline == args.inRange[1]:
+                    break
+                if do_write:
+                    path = os.path.realpath(os.path.join(relpath, line))
+                    my_infile.write(path)
+
+    infile = my_infile.name
+    # new infile will always have absolute path
+    relpath = ''
+
+# MAIN BODY #
+dc = DataCollection(nprocs = (1 if args.nothreads else -1))
+dc.meansnormslimit = int(args.nforweighter)
+dc.no_copy_on_convert = args.noramcopy
+dc.istestdata=testdata
+if len(nchilds):
+    dc.nprocs=int(nchilds)
+if args.batch is not None:
+    dc.batch_mode = True
+
+traind=None
+if class_name in class_options:
+    traind = class_options[class_name]
+elif not recover:
+    print('available classes:')
+    for key, val in class_options.items():
+        print(key)
+    raise Exception('wrong class selection')
+
+if recover:
+    dc.recoverCreateDataFromRootFromSnapshot(recover)        
+elif args.means:
+    dc.convertListOfRootFiles(
+        infile, traind, outPath,
+        means_only=True,
+        output_name='batch_template.djcdc',
+        relpath=relpath,
+        checkfiles=dofilecheck
+    )
+else:
+    logging.info('Start conversion')
+    dc.convertListOfRootFiles(
+        infile, traind, outPath, 
+        takeweightersfrom=useweightersfrom,
+        output_name=(args.batch if args.batch else 'dataCollection.djcdc'),
+        relpath=relpath,
+        checkfiles=dofilecheck
+    )
+
+if args.inRange is not None:
+    os.unlink(infile)
diff --git a/bin/convertPre2.0DCtoDC.py b/bin/convertPre2.0DCtoDC.py
new file mode 100755
index 0000000..9b0c111
--- /dev/null
+++ b/bin/convertPre2.0DCtoDC.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+'''
+
+@author:     jkiesele
+
+'''
+
+from argparse import ArgumentParser
+
+parser = ArgumentParser('simple program to convert old datacollection format to the new one')
+parser.add_argument("infile", help="input \"dc\" file")
+parser.add_argument("-c",  choices = class_options.keys(), help="set new output class (options: %s)" % ', '.join(class_options.keys()), metavar="Class")
+
+# process options
+args=parser.parse_args()
+
+
+import os
+from multiprocessing import Pool
+from DeepJetCore.TrainData_compat import TrainData as TDOld
+from DeepJetCore.TrainData import TrainData
+
+from DeepJetCore.DataCollection_compat import DataCollection as DCOld
+from DeepJetCore.DataCollection import DataCollection
+from DeepJetCore.conversion.conversion import class_options 
+
+infile=args.infile
+
+class_name = args.c
+
+if class_name in class_options:
+    traind = class_options[class_name]
+else:
+    print('available classes:')
+    for key, val in class_options.iteritems():
+        print(key)
+    raise Exception('wrong class selection')
+
+if not ".dc" in infile:
+    raise Exception('wrong input file '+infile)
+    
+dir = os.path.dirname(infile)
+
+dcold = DCOld()
+dcold.readRawFromFile(infile)
+
+
+dcnew = DataCollection()
+dcnew.dataclass = traind()
+dcnew.samples = [s[:-4]+'djctd' for s in dcold.samples]
+print(dcnew.samples)
+dcnew.sourceList = dcold.originRoots
+# leave traindata undefined no way to convert.
+dcnew.__nsamples = 0 # determine again, also check
+
+outfile = infile[:-2] +'djcdc'
+print("infile: ", infile, " outfile", outfile)
+
+def worker(i):
+
+    td = TDOld()
+    tdnew = TrainData()
+    print("converting",dcold.samples[i])
+    
+    td.readIn(dir + dcold.samples[i])
+    x = td.x
+    y = td.y
+    w = td.w
+    
+    tdnew.tdnew._store(x,y,w)
+    tdnew.writeToFile(dcnew.samples[i])
+    
+    td.clear()
+    tdnew.clear()
+    del x,y,w
+    return True
+    
+p = Pool()
+ret = p.map(worker, range(len(dcold.samples)))
+
+for r in ret:
+    if not r:
+        print('something went wrong ')
+        exit()
+    
+dcnew.writeToFile(outfile)
+
+
diff --git a/bin/convertPre2.0TDtoTD.py b/bin/convertPre2.0TDtoTD.py
new file mode 100755
index 0000000..a0fd72d
--- /dev/null
+++ b/bin/convertPre2.0TDtoTD.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+'''
+
+@author:     jkiesele
+
+'''
+
+from argparse import ArgumentParser
+from DeepJetCore.conversion.conversion import class_options
+
+parser = ArgumentParser('simple program to convert old (pre 2.0) traindata format to the new one')
+parser.add_argument("infile", help="input \"meta\" file")
+parser.add_argument("-c",  choices = class_options.keys(), help="set new output class (options: %s)" % ', '.join(class_options.keys()), metavar="Class")
+# process options
+args=parser.parse_args()
+infile=args.infile
+class_name = args.c
+
+if class_name in class_options:
+    traind = class_options[class_name]
+else:
+    print('available classes:')
+    for key, val in class_options.iteritems():
+        print(key)
+    raise Exception('wrong class selection')
+
+
+if not ".meta" in infile:
+    print('wrong input file '+infile)
+    exit()
+
+from DeepJetCore.TrainData_compat import TrainData
+td = TrainData()
+td.readIn(infile)
+x = td.x
+y = td.y
+w = td.w
+outfile = infile[:-5]
+print(outfile)
+
+from DeepJetCore.TrainData import TrainData
+
+tdnew = traind()
+tdnew._store(x,y,w)
+tdnew.writeToFile(outfile+".djctd")
diff --git a/bin/convertToPB.py b/bin/convertToPB.py
new file mode 100755
index 0000000..6d22040
--- /dev/null
+++ b/bin/convertToPB.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+
+#script that takes model in .h5 format as input as spits out the graph format used in CMSSW
+
+import imp
+try:
+    imp.find_module('setGPU')
+    import setGPU
+except ImportError:
+    found = False
+            
+import tensorflow as tf
+
+from DeepJetCore.DJCLosses import *
+from DeepJetCore.DJCLayers import *
+from argparse import ArgumentParser
+from keras import backend as K
+
+from DeepJetCore.customObjects import get_custom_objects
+
+custom_objs = get_custom_objects()
+
+sess = tf.Session()
+from keras.models import load_model
+from argparse import ArgumentParser
+from keras import backend as K
+from Losses import * #needed!
+import os
+
+K.set_session(sess)
+
+parser = ArgumentParser('')
+parser.add_argument('inputModel')
+parser.add_argument('outputDir')
+args = parser.parse_args()
+
+ 
+if os.path.isdir(args.outputDir):
+    raise Exception('output directory must not exists yet')
+
+model=load_model(args.inputModel, custom_objects=custom_objs)
+
+K.set_learning_phase(0)
+inputs = [node.op.name for node in model.inputs]
+print ("input layer names", inputs)
+outputs = [node.op.name for node in model.outputs]
+print ("output layer names",outputs)
+constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), outputs)
+tfoutpath=args.outputDir+'/tf'
+import os
+os.system('mkdir -p '+tfoutpath)
+tf.train.write_graph(constant_graph, tfoutpath, "constant_graph.pb", as_text=False)
+
+
diff --git a/bin/convertToTF.py b/bin/convertToTF.py
index 8a22ce5..dfffd17 100755
--- a/bin/convertToTF.py
+++ b/bin/convertToTF.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import imp
 try:
diff --git a/bin/createDataCollectionFromTD.py b/bin/createDataCollectionFromTD.py
new file mode 100755
index 0000000..bdbe3a0
--- /dev/null
+++ b/bin/createDataCollectionFromTD.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+
+
+from DeepJetCore.DataCollection import DataCollection
+from DeepJetCore.conversion.conversion import class_options
+from DeepJetCore.TrainData import TrainData
+from argparse import ArgumentParser
+import os
+
+parser = ArgumentParser('program to wrap converted trainData files in a dataCollection and attach a python TrainData class description')
+parser.add_argument("-c",  choices = class_options.keys(), help="set output class (options: %s)" % ', '.join(class_options.keys()), metavar="Class")
+parser.add_argument("-o",  help="dataCollection output file name",default="")
+
+parser.add_argument('files', metavar='N',nargs='+',
+                    help='djctd files to be merged in the DataCollection')
+
+args=parser.parse_args()
+
+
+if len(args.files) < 1:
+    print('you must provide at least one input file')
+    exit()
+if not len(args.o):
+    print('you must provide an output file name')
+    exit()
+
+indir = os.path.dirname(args.files[0])
+if len(indir):
+    indir+="/"
+class_name = args.c
+
+if class_name in class_options:
+    traind = class_options[class_name]
+else:
+    print('available classes:')
+    for key, val in class_options.items():
+        print(key)
+    raise Exception('wrong class selection')
+
+dc = DataCollection()
+dc.setDataClass(traind)
+
+for f in args.files:
+   dc.samples.append(os.path.basename(f))
+
+outfile = args.o
+if not outfile[-6:] == ".djcdc":
+    outfile+=".djcdc"
+dc.writeToFile(indir+outfile)
diff --git a/bin/createSubpackage.py b/bin/createSubpackage.py
new file mode 100755
index 0000000..744f5d7
--- /dev/null
+++ b/bin/createSubpackage.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+
+
+import sys
+import os
+from argparse import ArgumentParser
+
+parser = ArgumentParser('script to create a DeepJetCore subpackage')
+
+parser.add_argument("subpackage_directory", help="Directory to place the subpackage in (will be created). Last part will be the name of the subpackage")
+parser.add_argument("--data", help="create example data", default=False, action="store_true")
+
+args=parser.parse_args()
+
+deepjetcore = os.getenv('DEEPJETCORE')
+
+subpackage_dir=args.subpackage_directory
+subpackage_name = os.path.basename(os.path.normpath(subpackage_dir))
+
+if len(subpackage_dir)<1:
+    raise Exception("The subpackage name is too short")
+
+### templates ####
+
+
+environment_file='''
+#! /bin/bash
+
+export {subpackage}=$( cd "$( dirname "${BASH_SOURCE}" )" && pwd -P)
+export DEEPJETCORE_SUBPACKAGE=${subpackage}
+
+cd ${subpackage}
+export PYTHONPATH=${subpackage}/modules:$PYTHONPATH
+export PYTHONPATH=${subpackage}/modules/datastructures:$PYTHONPATH
+export PATH=${subpackage}/scripts:$PATH
+
+export LD_LIBRARY_PATH=${subpackage}/modules/compiled:$LD_LIBRARY_PATH
+export PYTHONPATH=${subpackage}/modules/compiled:$PYTHONPATH
+
+'''.format(deepjetcore=deepjetcore, 
+           subpackage=subpackage_name.upper(),
+           subpackage_dir=os.path.abspath(subpackage_dir),
+           BASH_SOURCE="{BASH_SOURCE[0]}")
+
+create_dir_structure_script='''
+#! /bin/bash
+mkdir -p {subpackage_dir}
+mkdir -p {subpackage_dir}/modules
+mkdir -p {subpackage_dir}/modules/datastructures
+mkdir -p {subpackage_dir}/scripts
+mkdir -p {subpackage_dir}/Train
+mkdir -p {subpackage_dir}/example_data
+mkdir -p {subpackage_dir}/cpp_analysis/src
+mkdir -p {subpackage_dir}/cpp_analysis/interface
+mkdir -p {subpackage_dir}/cpp_analysis/bin
+mkdir -p {subpackage_dir}/modules/compiled/src
+mkdir -p {subpackage_dir}/modules/compiled/interface
+'''.format(subpackage_dir=subpackage_dir)
+
+datastructure_template='''
+
+from DeepJetCore.TrainData import TrainData, fileTimeOut
+from DeepJetCore import SimpleArray
+import numpy as np
+
+class TrainData_example(TrainData):
+    def __init__(self):
+        TrainData.__init__(self)
+        # no class member is mandatory
+        self.description = "This is a TrainData example file. Having a description string is not a bad idea (but not mandatory), e.g. for describing the array structure."
+        #define any other (configuration) members that seem useful
+        self.someusefulemember = "something you might need later"
+
+        
+    #def createWeighterObjects(self, allsourcefiles):
+        # 
+        # This function can be used to derive weights (or whatever quantity)
+        # based on the entire data sample. It should return a dictionary that will then
+        # be passed to either of the following functions. The weighter objects
+        # should be pickleable.
+        # In its default implementation, the dict is empty
+        # return {}
+    
+    
+    def convertFromSourceFile(self, filename, weighterobjects, istraining):
+        # This is the only really mandatory function (unless writeFromSourceFile is defined).
+        # It defines the conversion rule from an input source file to the lists of training 
+        # arrays self.x, self.y, self.w
+        #  self.x is a list of input feature arrays
+        #  self.y is a list of truth arrays
+        #  self.w is optional and can contain a weight array 
+        #         (needs to have same number of entries as truth array)
+        #         If no weights are needed, this can be left completely empty
+        #
+        # The conversion should convert finally to numpy arrays. In the future, 
+        # also tensorflow tensors will be supported.
+        #
+        # In this example, differnt ways of reading files are deliberatly mixed
+        # 
+        
+        
+        print('reading '+filename)
+        
+        import ROOT
+        fileTimeOut(filename,120) #give eos a minute to recover
+        rfile = ROOT.TFile(filename)
+        tree = rfile.Get("tree")
+        nsamples = tree.GetEntries()
+        
+        # user code, example works with the example 2D images in root format generated by make_example_data
+        from DeepJetCore.preprocessing import read2DArray
+
+        feature_array = read2DArray(filename,"tree","image2d",nsamples,32,32)
+        
+        print('feature_array',feature_array.shape)
+        
+
+        import uproot3 as uproot
+
+        urfile = uproot.open(filename)["tree"]
+        truth = np.concatenate([np.expand_dims(urfile.array("isA"), axis=1) , 
+                                np.expand_dims(urfile.array("isB"), axis=1), 
+                                np.expand_dims(urfile.array("isC"), axis=1)],axis=1)
+        
+        truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type!
+        
+        self.nsamples=len(feature_array)
+        
+        #returns a list of feature arrays, a list of truth arrays and a list of weight arrays
+        return [SimpleArray(feature_array,name="features0")], [SimpleArray(truth,name="truth0")], []
+    
+    ## defines how to write out the prediction
+    def writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile):
+        # predicted will be a list of numpy arrays
+        # save it as you like, the following way is not recommended as it is slow
+        # and not disk-space efficient.
+        # You can also use the fast and compressed TrainData format itself for saving
+        # or use uproot to write a tree to a TFile
+        
+        import pickle
+        with open(outfilename,'wb') as f:
+            pickle.dump(predicted,f)
+        
+'''
+
+
+training_template='''
+
+#base class for tf based training
+from DeepJetCore.training.training_base import training_base
+
+#tf.keras imports
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Dense, Conv2D, Flatten, BatchNormalization #etc
+
+#callbacks
+from DeepJetCore.training.DeepJet_callbacks import simpleMetricsCallback
+
+def my_model(Inputs,otheroption):
+    
+    x = Inputs[0] #this is the self.x list from the TrainData data structure
+    x = BatchNormalization(momentum=0.9)(x)
+    x = Conv2D(8,(4,4),activation='relu', padding='same')(x)
+    x = Conv2D(8,(4,4),activation='relu', padding='same')(x)
+    x = Conv2D(8,(4,4),activation='relu', padding='same')(x)
+    x = BatchNormalization(momentum=0.9)(x)
+    x = Conv2D(8,(4,4),strides=(2,2),activation='relu', padding='valid')(x)
+    x = Conv2D(4,(4,4),strides=(2,2),activation='relu', padding='valid')(x)
+    x = Flatten()(x)
+    x = Dense(32, activation='relu')(x)
+    
+    # 3 prediction classes
+    x = Dense(3, activation='softmax')(x)
+    
+    predictions = [x]
+    return Model(inputs=Inputs, outputs=predictions)
+
+
+train=training_base()
+
+if not train.modelSet(): # allows to resume a stopped/killed training. Only sets the model if it cannot be loaded from previous snapshot
+
+    train.setModel(my_model,otheroption=1)
+    
+    train.compileModel(learningrate=0.0003,
+                   loss='categorical_crossentropy') 
+                   
+print(train.keras_model.summary())
+
+callbacks = [
+    simpleMetricsCallback(
+        # will be saved as interactive html plot
+        output_file=train.outputDir+'/metrics.html',
+        
+        # record all losses (val and train)
+        select_metrics='*loss*',
+        
+        # only call at the end of the epoch (when also val loss is available)
+        # can also be called after a certain amount of batches seen, please 
+        # check the documentation of the callback
+        call_on_epoch=True
+    )
+]
+
+train.trainModel(nepochs=50, 
+                 batchsize=500,
+                 checkperiod=10, # saves a checkpoint model every 10 epochs
+                 
+                 #register the additional callbacks
+                 additional_callbacks=callbacks,
+                 
+                 #other keyward arguments are passed to tf.keras.Model.fit
+                 verbose=1)
+                                 
+print('Since the training is done, use the predict.py script to predict the model output on your test sample, e.g.: predict.py <training output>/KERAS_model.h5 <training output>/trainsamples.djcdc <your subpackage>/example_data/test_data.txt <output dir>')
+'''
+        
+datastructures_init = '''
+#Make it look like a package
+from glob import glob
+from os import environ
+from os.path import basename, dirname
+from pdb import set_trace
+
+#gather all the files here
+modules = [basename(i.replace('.py','')) for i in glob('%s/[A-Za-z]*.py' % dirname(__file__))]
+__all__ = []
+structure_list=[]
+for module_name in modules:
+    module = __import__(module_name, globals(), locals(), [module_name])
+    for model_name in [i for i in dir(module) if 'TrainData' in i]:
+        
+        
+        model = getattr(module, model_name)
+        globals()[model_name] = model
+        locals( )[model_name] = model
+        __all__.append(model_name)
+        structure_list.append(model_name)
+
+'''
+        
+layers_template='''
+# Define custom layers here and add them to the global_layers_list dict (important!)
+global_layers_list = {}
+'''
+losses_template='''
+# Define custom losses here and add them to the global_loss_list dict (important!)
+global_loss_list = {}
+'''
+
+metrics_template='''
+# Define custom metrics here and add them to the global_metrics_list dict (important!)
+global_metrics_list = {}
+'''
+
+makefile_template='''
+
+#
+# This file might need some adjustments but should serve as a good basis
+#
+
+PYTHON_INCLUDE = `python-config --includes`
+PYTHON_LIB=`python-config --libs`
+
+ROOTSTUFF=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
+
+CPP_FILES := $(wildcard src/*.cpp)
+OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+LD_FLAGS := `root-config --cflags --glibs`  -lMathMore -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers -lquicklz
+CC_FLAGS := -fPIC -g -Wall `root-config --cflags`
+CC_FLAGS += -I./interface -I${DEEPJETCORE}/compiled/interface
+#CC_FLAGS += -MMD
+
+
+all: $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
+
+%: bin/%.cpp  $(OBJ_FILES) 
+	g++ $(CC_FLAGS) $(LD_FLAGS) $(OBJ_FILES) $< -o $@ 
+
+
+obj/%.o: src/%.cpp
+	g++ $(CC_FLAGS) -c -o $@ $<
+
+
+clean: 
+	rm -f obj/*.o obj/*.d
+	rm -f %
+'''
+
+bin_template='''
+#include "TString.h"
+#include "friendTreeInjector.h"
+#include <iostream>
+
+int main(int argc, char* argv[]){
+    if(argc<2) return -1;
+
+    TString infile = argv[1];
+
+    friendTreeInjector intree;
+    intree.addFromFile(infile);
+    intree.setSourceTreeName("tree");
+
+    intree.createChain();
+
+    auto c = intree.getChain();
+
+    std::cout << c->GetEntries() <<std::endl;
+    
+    /*
+    * For more information please refer to how to analse a TTree of the root documentation
+    */
+}
+'''
+
+compiled_module_template='''
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include "boost/python/str.hpp"
+#include <boost/python/exception_translator.hpp>
+#include <exception>
+
+//includes from deepjetcore
+#include "helper.h"
+#include "simpleArray.h"
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+
+/*
+ * Example of a python module that will be compiled.
+ * It can be used, e.g. to convert from fully custom input data
+ */
+
+np::ndarray readFirstFeatures(std::string infile){
+
+    auto arr = djc::simpleArray<float>({10,3,4});
+    arr.at(0,2,1) = 5. ;//filling some data
+
+    return simpleArrayToNumpy(arr);
+}
+
+BOOST_PYTHON_MODULE(c_convert) {
+    Py_Initialize();
+    np::initialize();
+    def("readFirstFeatures", &readFirstFeatures);
+}
+
+'''
+
+module_makefile='''
+
+
+#
+# This file might need some adjustments but should serve as a good basis
+#
+
+PYTHON_INCLUDE = `python-config --includes`
+PYTHON_LIB=`python-config --libs`
+
+ROOTSTUFF=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
+
+CPP_FILES := $(wildcard src/*.cpp)
+OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+LD_FLAGS := `root-config --cflags --glibs`  -lMathMore -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers -lquicklz
+CC_FLAGS := -fPIC -g -Wall `root-config --cflags`
+CC_FLAGS += -I./interface -I${DEEPJETCORE}/compiled/interface
+DJC_LIB = -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers 
+
+
+MODULES := $(wildcard src/*.C)
+MODULES_OBJ_FILES := $(addprefix ./,$(notdir $(MODULES:.C=.o)))
+MODULES_SHARED_LIBS := $(addprefix ./,$(notdir $(MODULES:.C=.so)))
+
+
+all: $(MODULES_SHARED_LIBS) $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
+#compile the module helpers if necessary
+#../modules/libsubpackagehelpers.so:
+#        cd ../modules; make; cd -
+
+%: bin/%.cpp  $(OBJ_FILES) 
+	g++ $(CC_FLAGS) $(LD_FLAGS) $(OBJ_FILES) $< -o $@ 
+
+
+obj/%.o: src/%.cpp
+	g++ $(CC_FLAGS) -c -o $@ $<
+
+
+#python modules
+
+%.so: %.o 
+	g++  -o $(@) -shared -fPIC  $(LINUXADD) $<   $(ROOTSTUFF)  $(PYTHON_LIB) -lboost_python -lboost_numpy $(DJC_LIB)
+
+%.o: src/%.C 
+	g++   $(ROOTCFLAGS) -O2 $(CC_FLAGS) -I./interface $(PYTHON_INCLUDE) -fPIC -c -o $(@) $<
+
+
+clean: 
+	rm -f obj/*.o obj/*.d *.so
+	rm -f %
+
+'''
+
+######## create the structure ########
+
+
+os.system(create_dir_structure_script)
+with  open(subpackage_dir+'/env.sh','w') as envfile:
+    envfile.write(environment_file)
+    
+with  open(subpackage_dir+'/modules/datastructures/TrainData_example.py','w') as lfile:
+    lfile.write(datastructure_template)
+    
+with  open(subpackage_dir+'/modules/datastructures/__init__.py','w') as lfile:
+    lfile.write(datastructures_init)
+    
+with  open(subpackage_dir+'/Train/training_example.py','w') as lfile:
+    lfile.write(training_template)
+    
+with  open(subpackage_dir+'/modules/Layers.py','w') as lfile:
+    lfile.write(layers_template)
+with  open(subpackage_dir+'/modules/Losses.py','w') as lfile:
+    lfile.write(losses_template)
+with  open(subpackage_dir+'/modules/Metrics.py','w') as lfile:
+    lfile.write(metrics_template)
+    
+with  open(subpackage_dir+'/cpp_analysis/Makefile','w') as lfile:
+    lfile.write(makefile_template)
+    
+with  open(subpackage_dir+'/cpp_analysis/bin/example.cpp','w') as lfile:
+    lfile.write(bin_template)
+    
+with  open(subpackage_dir+'/modules/compiled/Makefile','w') as lfile:
+    lfile.write(module_makefile)
+    
+with  open(subpackage_dir+'/modules/compiled/src/c_convert.C','w') as lfile:
+    lfile.write(compiled_module_template)
+
+
+print('subpackage '+ subpackage_name + " created in "+subpackage_dir)    
+if args.data:
+    print('creating example data... (10 training files, 1 test file, 1000 events each)')
+    os.system('cd '+subpackage_dir+'/example_data;  make_example_data  1000 10 1')
+    print('example data can be found in '+subpackage_dir+'/example_data.')
+    
+print('Before using the subpackage, source the "env.sh" file in the subpackage directory (not in DeepJetCore).')
+print('to convert to example TrainData format use:')
+print('convertFromSource.py -i '+subpackage_dir+'/example_data/train_files.txt -o <output: training data dir> -c TrainData_example')
+
+print('\nAn example to run the training can be found in '+subpackage_dir+'/Train/training_example.py')
+print('It can be run with: \npython3 '+subpackage_dir+'/Train/training_example.py <training data dir>/dataCollection.djcdc <output: training dir>')
+
+
+
+
+
+
+
diff --git a/bin/extractDjcCppInterface.py b/bin/extractDjcCppInterface.py
new file mode 100755
index 0000000..7b3c221
--- /dev/null
+++ b/bin/extractDjcCppInterface.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+
+from argparse import ArgumentParser
+import os
+
+parser = ArgumentParser('Extract the C++ interface for trainData etc to be used outside in a simple package')
+parser.add_argument('outputDir')
+
+
+args = parser.parse_args()
+
+ 
+script = '''
+#!/bin/bash
+mkdir -p {outdir}
+mkdir -p {outdir}/interface
+mkdir -p {outdir}/src
+mkdir -p {outdir}/obj
+cp $DEEPJETCORE/compiled/interface/version.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/interface/IO.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/interface/quicklz.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/interface/quicklzWrapper.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/interface/simpleArray.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/interface/trainData.h {outdir}/interface/
+cp $DEEPJETCORE/compiled/src/quicklz.c {outdir}/src/
+
+'''.format(outdir=args.outputDir)
+
+os.system(script)
+
+makefile = '''
+
+ROOTLIBS=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
+CPP_FILES := $(wildcard src/*.cpp)
+OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+
+BINS := $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
+
+all: $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp)) libquicklz.so libdeepjetcoredataformats.so
+
+#helpers
+libquicklz.so:
+	gcc -shared -O2 -fPIC src/quicklz.c -o libquicklz.so
+    
+obj/%.o: src/%.cpp
+	g++ $(CFLAGS) $(ROOTCFLAGS) -I./interface -O2 -fPIC -c -o $@ $< 
+
+#pack helpers in lib
+libdeepjetcoredataformats.so: $(OBJ_FILES) 
+	g++ -o $@ -shared -fPIC  -fPIC  $(OBJ_FILES) $(ROOTLIBS)
+
+
+%: bin/%.cpp libdeepjetcoredataformats.so libquicklz.cxx
+	g++ $(CFLAGS) -I./interface  $< -L. -ldeepjetcoredataformats -lquicklz  $(ROOTCFLAGS) $(ROOTLIBS)   -o  $@  
+    
+
+clean: 
+	rm -f libdeepjetcoredataformats.so libquicklz.so
+	rm -f obj/*.o 
+
+'''
+
+
+with  open(args.outputDir+'/Makefile','w') as lfile:
+    lfile.write(makefile)
+
diff --git a/bin/mergeDataCollections.py b/bin/mergeDataCollections.py
new file mode 100755
index 0000000..5e98bcc
--- /dev/null
+++ b/bin/mergeDataCollections.py
@@ -0,0 +1,71 @@
+#!/bin/env python3
+
+from argparse import ArgumentParser
+from DeepJetCore.DataCollection import DataCollection
+import os
+
+def sumDCandWrite(filelist, outname):
+    alldc=[]
+    for f in filelist:
+        try:
+            dc = DataCollection(f)
+        except:
+            print('read in of '+f +' not working, skip')
+            continue
+        alldc.append(dc)
+        rel  = os.path.relpath(dc.dataDir,os.getcwd())
+        dc.prependToSampleFiles(rel+'/')
+        dc.dataDir=os.getcwd()
+
+    merged = sum(alldc)
+    print(outname)
+    merged.writeToFile(outname)
+
+parser = ArgumentParser('program to merge dataCollection files')
+parser.add_argument('inputfiles', metavar='N', type=str, nargs='+',help='input data collection files (.dc)')
+parser.add_argument("--testsplit", help="The fraction used to create a testing dataset", default=0, type=float)
+parser.add_argument("--outputprefix", help="prefix to be used for output", default="merged", type=str)
+
+args = parser.parse_args()
+
+outprefix = args.outputprefix
+if len(outprefix) and outprefix[-1] != '_':
+    outprefix+='_'
+
+if args.testsplit > 1 or args.testsplit < 0:
+    print('testsplit must not be larger than 1 or smaller than 0, abort')
+    exit(-1)
+
+#DEBUG
+ninput = float(len(args.inputfiles))
+
+if args.testsplit > 0 and ninput*(args.testsplit) < 1:
+    print('testsplit too small to produce a single test file, abort')
+    exit(-2)
+    
+
+trainfiles = []
+testfiles  = []
+
+if args.testsplit == 0:
+    trainfiles = args.inputfiles
+else:
+    
+    for i in range(len(args.inputfiles)):
+        if i < ninput*(1.-args.testsplit):
+            trainfiles.append(args.inputfiles[i])
+        else:
+            testfiles.append(args.inputfiles[i])
+            
+            
+print(trainfiles)
+print(testfiles)
+
+if args.testsplit > 0:
+    sumDCandWrite(testfiles, outprefix+'test.dc')
+sumDCandWrite(trainfiles, outprefix+'train.dc')
+
+
+
+
+
diff --git a/bin/mergeOrSplitFiles.py b/bin/mergeOrSplitFiles.py
new file mode 100755
index 0000000..074f0ed
--- /dev/null
+++ b/bin/mergeOrSplitFiles.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+'''
+
+@author:     jkiesele
+
+'''
+
+
+from argparse import ArgumentParser
+parser = ArgumentParser('merge or split files belonging to a dataCollection differently. The output will be written to the current working directory!')
+parser.add_argument("infile", help="input \"dc\" file")
+parser.add_argument("nelementsperfile", help="number of entries per file (output), for ragged, maximum number of elements")
+parser.add_argument("--randomise", help="randomise order, could be helpful if difference samples need to be mixed", action='store_true')
+args=parser.parse_args()
+
+
+from DeepJetCore.DataCollection import DataCollection
+from DeepJetCore.dataPipeline import TrainDataGenerator
+
+infile=args.infile
+nbatch=int(args.nelementsperfile)
+randomise = args.randomise
+
+dc = DataCollection(infile)
+dc2 = DataCollection(infile)
+samples = dc.samples
+
+dir = dc.dataDir
+if len(dir)<1:
+    dir='.'
+insamples = [dir+'/'+s for s in samples]
+
+gen = TrainDataGenerator()
+gen.setBatchSize(nbatch)
+gen.setSkipTooLargeBatches(False)
+gen.setFileList(insamples)
+
+if randomise:
+    gen.shuffleFileList()
+
+nbatches = gen.getNBatches()
+
+newsamples=[]
+for i in range(nbatches):
+    newname = str(samples[0][:-6]+"_n_"+str(i)+".djctd")
+    newsamples.append(newname)
+    ntd = gen.getBatch()
+    print(newname)
+    ntd.writeToFile(newname)
+    print('..written')
+    
+dc2.samples = newsamples
+dc2.writeToFile(infile[:-5]+"_n.djcdc")
\ No newline at end of file
diff --git a/bin/plotLoss.py b/bin/plotLoss.py
index 1354351..b4f38b0 100755
--- a/bin/plotLoss.py
+++ b/bin/plotLoss.py
@@ -1,6 +1,5 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
     
-from DeepJetCore.evaluation import plotLoss
 
 
 from argparse import ArgumentParser
@@ -11,6 +10,9 @@
     
 args = parser.parse_args()
 
+from DeepJetCore.evaluation import plotLoss
+
+
 infilename=args.inputDir+'/'+args.file
 
 
diff --git a/bin/predict.py b/bin/predict.py
index 4089aba..2a12c79 100755
--- a/bin/predict.py
+++ b/bin/predict.py
@@ -1,82 +1,133 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
-import imp
-try:
-    imp.find_module('setGPU')
-    import setGPU
-except ImportError:
-    found = False
-    
-from keras.models import load_model
-from DeepJetCore.evaluation import testDescriptor
 from argparse import ArgumentParser
-from keras import backend as K
-import imp
-try:
-    imp.find_module('Losses')
-    from Losses import *
-except ImportError:
-    print 'No Losses module found, ignoring at your own risk'
-    global_loss_list = {}
-
-try:
-    imp.find_module('Layers')
-    from Layers import *
-except ImportError:
-    print 'No Layers module found, ignoring at your own risk'
-    global_layers_list = {}
-
-try:
-    imp.find_module('Metrics')
-    from Metrics import *
-except ImportError:
-    print 'No metrics module found, ignoring at your own risk'
-    global_metrics_list = {}
-
-import os
 
 
-parser = ArgumentParser('Apply a model to a (test) sample and create friend trees to inject it inthe original ntuple')
+parser = ArgumentParser('Apply a model to a (test) source sample.')
 parser.add_argument('inputModel')
-parser.add_argument('inputDataCollection')
-parser.add_argument('outputDir')
-parser.add_argument('--use', help='coma-separated list of prediction indexes to be used')
-parser.add_argument('--labels', action='store_true', help='store true labels in the trees')
-parser.add_argument('--monkey_class', default='', help='allows to read the data with a different TrainData, it is actually quite dangerous if you do not know what you are doing')
-parser.add_argument('--numpy', help='switches on numpy rec-array output in addition to root files. Will produce ONE large file (can become big)', action='store_true' , default=False )
-
-args = parser.parse_args()
+parser.add_argument('trainingDataCollection', help="the training data collection. Used to infer data format and batch size.")
+parser.add_argument('inputSourceFileList', help="can be text file or a DataCollection file in the same directory as the sample files, or just a single traindata file.")
+parser.add_argument('outputDir', help="will be created if it doesn't exist.")
+parser.add_argument("-b", help="batch size, overrides the batch size from the training data collection.",default="-1")
+parser.add_argument("--gpu",  help="select specific GPU", metavar="OPT", default="")
+parser.add_argument("--unbuffered", help="do not read input in memory buffered mode (for lower memory consumption on fast disks)", default=False, action="store_true")
+parser.add_argument("--pad_rowsplits", help="pad the row splits if the input is ragged", default=False, action="store_true")
 
- 
-if os.path.isdir(args.outputDir):
-    raise Exception('output directory must not exists yet')
-
-custom_objs = {}
-custom_objs.update(global_loss_list)
-custom_objs.update(global_layers_list)
-custom_objs.update(global_metrics_list)
-model=load_model(args.inputModel, custom_objects=custom_objs)
 
+args = parser.parse_args()
+batchsize = int(args.b)
 
-td=testDescriptor(addnumpyoutput = args.numpy)
-if args.use:
-	td.use_only = [int(i) for i in args.use.split(',')]
 
+import imp
 from DeepJetCore.DataCollection import DataCollection
+from DeepJetCore.dataPipeline import TrainDataGenerator
+import tempfile
+import atexit
+import os
+from keras.models import load_model
+from keras import backend as K
+from DeepJetCore.customObjects import get_custom_objects
+from DeepJetCore.training.gpuTools import DJCSetGPUs
+
+inputdatafiles=[]
+inputdir=None
+
+## prepare input lists for different file formats
+
+if args.inputSourceFileList[-6:] == ".djcdc":
+    print('reading from data collection',args.inputSourceFileList)
+    predsamples = DataCollection(args.inputSourceFileList)
+    inputdir = predsamples.dataDir
+    for s in predsamples.samples:
+        inputdatafiles.append(s)
+        
+elif args.inputSourceFileList[-6:] == ".djctd":
+    inputdir = os.path.abspath(os.path.dirname(args.inputSourceFileList))
+    infile = os.path.basename(args.inputSourceFileList)
+    inputdatafiles.append(infile)
+else:
+    print('reading from text file',args.inputSourceFileList)
+    inputdir = os.path.abspath(os.path.dirname(args.inputSourceFileList))
+    with open(args.inputSourceFileList, "r") as f:
+        for s in f:
+            inputdatafiles.append(s.replace('\n', '').replace(" ",""))
+        
+
+DJCSetGPUs(args.gpu)
+
+custom_objs = get_custom_objects()
 
-testd=DataCollection()
-testd.readFromFile(args.inputDataCollection)
-
+model=load_model(args.inputModel, custom_objects=custom_objs)
+dc = None
+if args.inputSourceFileList[-6:] == ".djcdc" and not args.trainingDataCollection[-6:] == ".djcdc":
+    dc = DataCollection(args.inputSourceFileList)
+    if batchsize < 1:
+        batchsize = 1
+    print('No training data collection given. Using batch size of',batchsize)
+else:
+    dc = DataCollection(args.trainingDataCollection)
 
-os.mkdir(args.outputDir)
+outputs = []
+os.system('mkdir -p '+args.outputDir)
 
-td.makePrediction(
-    model, testd, args.outputDir,
-    store_labels = args.labels,
-    monkey_class = args.monkey_class
-)
 
-td.writeToTextFile(args.outputDir+'/tree_association.txt')
+for inputfile in inputdatafiles:
+    
+    print('predicting ',inputdir+"/"+inputfile)
+    
+    use_inputdir = inputdir
+    if inputfile[0] == "/":
+        use_inputdir=""
+    outfilename = "pred_"+os.path.basename( inputfile )
+    
+    td = dc.dataclass()
+
+    if inputfile[-5:] == 'djctd':
+        if args.unbuffered:
+            td.readFromFile(use_inputdir+"/"+inputfile)
+        else:
+            td.readFromFileBuffered(use_inputdir+"/"+inputfile)
+    else:
+        print('converting '+inputfile)
+        td.readFromSourceFile(use_inputdir+"/"+inputfile, dc.weighterobjects, istraining=False)
+    
 
-#    make the file reading entirely C++
-#    then it can be used for other studies
+    gen = TrainDataGenerator()
+    if batchsize < 1:
+        batchsize = dc.getBatchSize()
+    print('batch size',batchsize)
+    gen.setBatchSize(batchsize)
+    gen.setSquaredElementsLimit(dc.batch_uses_sum_of_squares)
+    gen.setSkipTooLargeBatches(False)
+    gen.setBuffer(td)
+    
+    predicted = model.predict_generator(gen.feedNumpyData(),
+                                        steps=gen.getNBatches(),
+                                        max_queue_size=1,
+                                        use_multiprocessing=False,verbose=1)
+    
+    
+    x = td.transferFeatureListToNumpy(args.pad_rowsplits)
+    w = td.transferWeightListToNumpy(args.pad_rowsplits)
+    y = td.transferTruthListToNumpy(args.pad_rowsplits)
+    
+    td.clear()
+    gen.clear()
+    
+    if not type(predicted) == list: #circumvent that keras return only an array if there is just one list item
+        predicted = [predicted]   
+    overwrite_outname = td.writeOutPrediction(predicted, x, y, w, args.outputDir + "/" + outfilename, use_inputdir+"/"+inputfile)
+    if overwrite_outname is not None:
+        outfilename = overwrite_outname
+    outputs.append(outfilename)
+    
+    
+    
+with open(args.outputDir + "/outfiles.txt","w") as f:
+    for l in outputs:
+        f.write(l+'\n')
+    
+    
+    
+    
+    
diff --git a/bin/prepare_for_deployment.py b/bin/prepare_for_deployment.py
new file mode 100755
index 0000000..253a1d5
--- /dev/null
+++ b/bin/prepare_for_deployment.py
@@ -0,0 +1,56 @@
+#! /bin/env python3
+
+from argparse import ArgumentParser
+
+parser = ArgumentParser()
+parser.add_argument('model')
+parser.add_argument('output', help='output files name')
+#parser.add_argument("", action='store_true')
+parser.add_argument("--batch", type=int, default=1, help='batch size to be embedded in deployment')
+args = parser.parse_args()
+
+
+from keras.models import load_model
+from keras import backend as K
+from tensorflow.python.tools import optimize_for_inference_lib
+from DeepJetCore.customObjects import get_custom_objects
+
+custom_objs = get_custom_objects()
+
+import tensorflow as tf
+sess = tf.Session()
+K.set_session(sess)
+
+
+
+K.set_learning_phase(False) #FUNDAMENTAL! this MUST be before loading the model!
+model=load_model(args.model, custom_objects=custom_objs)
+
+output_names = [format_name(i.name) for i in model.outputs]
+constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), output_names)
+
+tf.train.write_graph(
+   constant_graph, "", 
+   args.output if args.output.endswith('.pb') else '%s.pb' % args.output,
+   as_text=False
+   )
+
+txt_config = args.output.replace('.pb', '.config.pbtxt') \
+   if args.output.endswith('.pb') else '%s.config.pbtxt' % args.output
+with open(txt_config, 'w') as config:
+   for feed in model.inputs:
+      #the first element is always the batch size (None in the graph, needs to be defined here)
+      shape = [args.batch] + [int(i) for i in feed.shape[1:]]
+      shape = ['    dim { size : %s }' % i for i in shape]
+      shape = '\n'.join(shape)
+      config.write('''feed {
+  id { node_name: "%s" }
+  shape {
+%s
+  }
+}
+''' % (format_name(feed.name), shape))
+
+   config.write('\n')
+   for fetch in output_names:
+      config.write('fetch {\n  id { node_name: "%s" }\n}\n' % fetch)
diff --git a/bin/validateData.py b/bin/validateData.py
new file mode 100755
index 0000000..a2fcbc6
--- /dev/null
+++ b/bin/validateData.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+
+
+
+from argparse import ArgumentParser
+parser = ArgumentParser('Check if all files in a dataset (datacollection) are ok or remove a specific entry\n')
+parser.add_argument('inputDataCollection')
+parser.add_argument('--remove',default="")
+parser.add_argument('--skip_first',default=0)
+args=parser.parse_args()
+
+from DeepJetCore.DataCollection import DataCollection
+
+dc=DataCollection(args.inputDataCollection)
+dc.writeToFile(args.inputDataCollection+".backup")
+
+if not len(args.remove):
+    dc.validate(remove=True, skip_first=int(args.skip_first))
+else:
+    dc.removeEntry(args.remove)
+    print('total size after: '+str(dc.nsamples))
+
+dc.writeToFile(args.inputDataCollection)
\ No newline at end of file
diff --git a/bin/validateFiles.py b/bin/validateFiles.py
new file mode 100755
index 0000000..80882fc
--- /dev/null
+++ b/bin/validateFiles.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+
+from argparse import ArgumentParser
+import os
+from DeepJetCore.conversion.conversion import class_options
+import tqdm
+
+parser = ArgumentParser('Check if all files in a file list and remove broken entries\n')
+parser.add_argument('inputFileList')
+parser.add_argument("-c",  choices = class_options.keys(), help="set output class (required, options: %s)" % ', '.join(class_options.keys()), metavar="Class")
+
+args=parser.parse_args()
+
+class_name=args.c
+
+traind = None
+if class_name in class_options:
+    traind = class_options[class_name]()
+else:
+    print('available classes:')
+    for key, val in class_options.items():
+        print(key)
+    exit()
+    
+infiles = []
+inputdir = os.path.abspath(os.path.dirname(args.inputFileList))
+if len(inputdir):
+    inputdir+="/"
+    
+with open(args.inputFileList, "r") as f:
+    for s in f:
+        if len(s):
+            infiles.append(s[:-1])#remove '\n'
+            
+os.system("cp -f "+args.inputFileList+" "+args.inputFileList+".backup")
+
+removedfiles=[]
+with open(args.inputFileList, "w") as f:
+    for s in tqdm.tqdm(infiles):
+        if traind.fileIsValid(inputdir+s):
+            f.write(s+'\n')
+        else:
+            removedfiles.append(inputdir+s)
+            
+print('files removed',removedfiles)
\ No newline at end of file
diff --git a/compiled/.gitignore b/compiled/.gitignore
index ced634c..3f9f8af 100644
--- a/compiled/.gitignore
+++ b/compiled/.gitignore
@@ -1,3 +1,7 @@
 *.so
 *.o
 obj/*.o
+classdict.cxx
+classdict.rootmap
+classdict_rdict.pcm
+
diff --git a/compiled/Makefile b/compiled/Makefile
index 2365cd5..5e457a3 100644
--- a/compiled/Makefile
+++ b/compiled/Makefile
@@ -1,55 +1,56 @@
-# location of the Python header files
- 
-PYTHON_VERSION = 2.7
-PYTHON_INCLUDE = ${CONDA_PREFIX}/include/python2.7
- 
+
 # location of the Boost Python include files and library
- 
-# also works on gpu in compiled version
-# this is just luck, ...
-BOOST_INC = ${CONDA_PREFIX}/include
-BOOST_LIB = ${CONDA_PREFIX}/lib
-LINUXADD=-Wl,--export-dynamic
-ROOTSTUFF=`root-config --cflags --libs --glibs` -g
-CFLAGS=
+PYTHON_INCLUDE = `python3-config --includes`
+PYTHON_LIBS=`python3-config --ldflags`
+
+#PYTHONLIB=python3.6m
 
+LINUXADD= #-Wl --export-dynamic
+ROOTSTUFF=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
 CPP_FILES := $(wildcard src/*.cpp)
 OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
 
+BINS := $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
 MODULES := $(wildcard src/*.C)
 MODULES_OBJ_FILES := $(addprefix ./,$(notdir $(MODULES:.C=.o)))
 MODULES_SHARED_LIBS := $(addprefix ./,$(notdir $(MODULES:.C=.so)))
 
-UNAME_S := $(shell uname -s)
-# remove linux flags in osx
-ifeq ($(UNAME_S),Darwin)
-	LINUXADD=""
-endif
+# root needs this
+CFLAGS := -std=c++1z
+
+all: $(MODULES_SHARED_LIBS) $(patsubst to_bin/%.cpp, %, $(wildcard to_bin/*.cpp)) classdict.so
+
+classdict.cxx: src/LinkDef.h
+	rootcling -v4 -f $@  -rmf classdict.rootmap -rml classdict.so  $^ > /dev/null 2>&1
 
-all: $(MODULES_SHARED_LIBS)
+classdict.so: classdict.cxx
+	g++ $(CFLAGS) $(ROOTCFLAGS) $(ROOTSTUFF)  -fPIC -shared -o classdict.so classdict.cxx 	
+
+
+%: to_bin/%.cpp libdeepjetcorehelpers.so classdict.cxx
+	g++ $(CFLAGS) -I./interface $(LINUXADD) $(PYTHON_INCLUDE) $< -L. -ldeepjetcorehelpers -lquicklz  $(PYTHON_LIBS)  -lboost_python3 -lboost_numpy3 $(ROOTCFLAGS) $(ROOTSTUFF)   -o  $@  
+	mv $@ ../bin/
 
 #helpers
 libquicklz.so:
 	gcc -shared -O2 -fPIC src/quicklz.c -o libquicklz.so
 
 obj/%.o: src/%.cpp
-	g++ $(CFLAGS) $(ROOTSTUFF) -I./interface -O2 -fPIC -c -o $@ $< 
-    
+	g++ $(CFLAGS) $(PYTHON_INCLUDE) $(ROOTCFLAGS) -I./interface -O2 -fPIC -c -o $@ $< 
+
 #pack helpers in lib
-libdeepjetcorehelpers.so: $(OBJ_FILES)
-	g++ -shared $(LINUXADD)  $(ROOTSTUFF) obj/*.o -o $@ 
-  
+libdeepjetcorehelpers.so: $(OBJ_FILES) 
+	g++ -o $@ -shared -fPIC  $(LINUXADD) $(CFLAGS) -fPIC  obj/*.o $(ROOTSTUFF) $(PYTHON_LIBS)  -lboost_python3 -lboost_numpy3 
 
 %.so: %.o libdeepjetcorehelpers.so libquicklz.so
-	g++ -shared $(LINUXADD)  $(ROOTSTUFF) -lquicklz -L./ -ldeepjetcorehelpers -L$(BOOST_LIB)  -lboost_python -L${CONDA_PREFIX}/lib/python$(PYTHON_VERSION)/config -lpython2.7  $< -o $(@) 
-
+	g++  -o $(@) -shared -g -fPIC $(CFLAGS) $(LINUXADD) $<   $(ROOTSTUFF) -L./ -lquicklz  $(PYTHON_LIBS)  -lboost_python3 -lboost_numpy3 -L./ -ldeepjetcorehelpers 
 
 %.o: src/%.C 
-	g++   $(ROOTSTUFF) -O2 -I./interface -I$(PYTHON_INCLUDE) -I$(BOOST_INC) -fPIC -c -o $(@) $<
+	g++   $(ROOTCFLAGS) -O2 -g -I./interface $(PYTHON_INCLUDE) -fPIC $(CFLAGS) -c -o $(@) $<
+
 
 
 clean: 
-	rm -f $(OBJ_FILES) $(SHARED_LIBS) $(MODULES_SHARED_LIBS) $(MODULES_OBJ_FILES) libdeepjetcorehelpers.so libquicklz.so
-	
-	
-	
+	rm -f $(OBJ_FILES) $(SHARED_LIBS) $(MODULES_SHARED_LIBS) $(MODULES_OBJ_FILES) libdeepjetcorehelpers.so libquicklz.so classdict.so classdict.cxx classdict.rootmap classdict_rdict.pcm
diff --git a/compiled/Makefile_conda b/compiled/Makefile_conda
new file mode 100644
index 0000000..34ef3d1
--- /dev/null
+++ b/compiled/Makefile_conda
@@ -0,0 +1,57 @@
+
+# location of the Boost Python include files and library
+PYTHON_INCLUDE = `python3-config --includes`
+PYTHON_LIB=`python3-config --libs`
+
+# as 36, 37 or 38 to get the right lboost
+PYTHON_VERSION=$(shell python -c 'import sys; print("%d%d"% sys.version_info[0:2])')
+
+
+LINUXADD= #-Wl --export-dynamic
+ROOTSTUFF=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
+CPP_FILES := $(wildcard src/*.cpp)
+OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+
+BINS := $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
+MODULES := $(wildcard src/*.C)
+MODULES_OBJ_FILES := $(addprefix ./,$(notdir $(MODULES:.C=.o)))
+MODULES_SHARED_LIBS := $(addprefix ./,$(notdir $(MODULES:.C=.so)))
+
+#CFLAGS := -g
+
+all: $(MODULES_SHARED_LIBS) $(patsubst to_bin/%.cpp, %, $(wildcard to_bin/*.cpp)) classdict.so
+
+classdict.cxx: src/LinkDef.h
+	rootcling -v4 -f $@  -rmf classdict.rootmap -rml classdict.so  $^
+
+classdict.so: classdict.cxx
+	g++ $(ROOTCFLAGS) $(ROOTSTUFF) -std=c++17 -fPIC -shared -o classdict.so classdict.cxx 	
+
+
+%: to_bin/%.cpp libdeepjetcorehelpers.so classdict.cxx
+	g++ $(CFLAGS) -I./interface $(LINUXADD) $(PYTHON_INCLUDE) $< -L. -ldeepjetcorehelpers -lquicklz  $(PYTHON_LIB) -lboost_python$(PYTHON_VERSION) -lboost_numpy$(PYTHON_VERSION) $(ROOTCFLAGS) $(ROOTSTUFF)   -o  $@  
+	mv $@ ../bin/
+
+#helpers
+libquicklz.so:
+	gcc -shared -O2 -fPIC src/quicklz.c -o libquicklz.so
+
+obj/%.o: src/%.cpp
+	g++ $(CFLAGS) $(PYTHON_INCLUDE) $(ROOTCFLAGS) -I./interface -O2 -fPIC -c -o $@ $< 
+
+#pack helpers in lib
+libdeepjetcorehelpers.so: $(OBJ_FILES) 
+	g++ -o $@ -shared -fPIC  $(LINUXADD) $(CFLAGS) -fPIC  obj/*.o $(ROOTSTUFF) $(PYTHON_LIB) -lboost_python$(PYTHON_VERSION) -lboost_numpy$(PYTHON_VERSION) 
+
+%.so: %.o libdeepjetcorehelpers.so libquicklz.so
+	g++  -o $(@) -shared -g -fPIC $(CFLAGS) $(LINUXADD) $<   $(ROOTSTUFF) -L./ -lquicklz  $(PYTHON_LIB) -lboost_python$(PYTHON_VERSION) -lboost_numpy$(PYTHON_VERSION) -L./ -ldeepjetcorehelpers 
+
+%.o: src/%.C 
+	g++   $(ROOTCFLAGS) -O2 -g -I./interface $(PYTHON_INCLUDE) -fPIC $(CFLAGS) -c -o $(@) $<
+
+
+
+clean: 
+	rm -f $(OBJ_FILES) $(SHARED_LIBS) $(MODULES_SHARED_LIBS) $(MODULES_OBJ_FILES) libdeepjetcorehelpers.so libquicklz.so classdict.so classdict.cxx classdict.rootmap classdict_rdict.pcm
diff --git a/compiled/interface/IO.h b/compiled/interface/IO.h
new file mode 100644
index 0000000..0a64094
--- /dev/null
+++ b/compiled/interface/IO.h
@@ -0,0 +1,108 @@
+/*
+ * IO.h
+ *
+ *  Created on: 7 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_IO_H_
+#define DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_IO_H_
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdexcept>
+#include <sys/stat.h>
+#include <string>
+#include <vector>
+#include <iostream>
+/*
+ * Very simple template wrapper around fread and fwrite with error checks
+ * The number of datatypes written is NOT given in bytes.
+ * Only works for types with valid sizeof(type).
+ * Otherwise specify number of bytes
+ */
+
+namespace djc{
+namespace io{
+
+inline bool fileExists(const std::string& name) {
+  struct stat buffer;
+  return (stat (name.c_str(), &buffer) == 0);
+}
+
+//only linux
+inline std::string followFileName(FILE * ofile){
+    char proclnk[0xFFF];
+    char filename[0xFFF];
+    sprintf(proclnk, "/proc/self/fd/%d", fileno(ofile));
+    int r = readlink(proclnk, filename, 0xFFF);
+    std::string fname="uknown";
+    if(r>0){
+        if(r>=0xFFF-1)
+            r = 0xFFF-1;
+        filename[r]='\0';
+        fname=filename;
+    }
+    return fname;
+}
+
+template <class T>
+void writeToFile(const T * p, FILE * ofile, size_t N=1, size_t Nbytes=0){
+    if(!Nbytes){
+        Nbytes = N*sizeof(T);
+    }
+    size_t ret = fwrite(p, 1, Nbytes, ofile);
+    if(ret != Nbytes){
+        std::string fname = followFileName(ofile);
+        fclose(ofile);
+        throw std::runtime_error("djc::io::writeToFile: writing to file "+fname+" not successful");
+    }
+}
+
+template <>
+void writeToFile<std::string>(const std::string * p, FILE * ofile, size_t N, size_t Nbytes);
+
+
+template <class T>
+void writeToFile(const std::vector<T> * p, FILE * ofile, size_t N=1, size_t Nbytes=0){
+    N = p->size();
+    writeToFile<size_t>(&N,ofile);
+    for(const auto& v:*p)
+        writeToFile(&v,ofile);
+}
+
+
+
+template <class T>
+void readFromFile(T * p, FILE* ifile, size_t N=1, size_t Nbytes=0){
+    if(!Nbytes)
+        Nbytes = N* sizeof(T);
+    size_t ret = fread(p, 1, Nbytes, ifile);
+    if(ret != Nbytes){
+        std::string fname = followFileName(ifile);
+        fclose(ifile);
+        throw std::runtime_error("djc::io::readFromFile:reading from file "+fname+" not successful");
+    }
+}
+
+template <>
+void readFromFile<std::string>(std::string * p, FILE* ifile, size_t N, size_t Nbytes);
+
+
+template <class T>
+void readFromFile(std::vector<T> * p, FILE* ifile, size_t N=1, size_t Nbytes=0){
+    readFromFile<size_t>(&N,ifile);
+    if(!N){
+        p->resize(0);
+        return;
+    }
+    p->resize(N);
+    for(auto& v: *p)
+        readFromFile<T>(&v,ifile);
+}
+
+}
+}
+
+#endif /* DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_IO_H_ */
diff --git a/compiled/interface/c_helper.h b/compiled/interface/c_helper.h
new file mode 100644
index 0000000..e4bdb64
--- /dev/null
+++ b/compiled/interface/c_helper.h
@@ -0,0 +1,64 @@
+/*
+ * helper.h
+ *
+ *  Created on: 8 Apr 2017
+ *      Author: jkiesele
+ */
+
+#ifndef DEEPJET_MODULES_INTERFACE_CHELPER_H_
+#define DEEPJET_MODULES_INTERFACE_CHELPER_H_
+
+
+#include <dirent.h>
+#include <stdlib.h>
+#include "TString.h"
+#include "TObject.h"
+#include "TString.h"
+#include <sstream>
+#include <string>
+#include <iostream>
+
+TString prependXRootD(const TString& path);
+
+bool isApprox(const float& a , const float& b, float eps=0.001);
+
+float deltaPhi(const float& phi1, const float& phi2);
+
+void checkTObject(const TObject * o, TString msg);
+
+template<class T>
+T*  getLineDouble(const T * h);
+
+template <class T>
+std::string to_str(const T& t){
+    std::stringstream ss;
+    ss << t;
+    return ss.str();
+}
+
+template <class T>
+std::string to_str(const std::vector<T>& t){
+    std::stringstream ss;
+    ss << "[";
+    for(const auto& v:t)
+        ss << " " << to_str(v);
+    ss << " ]";
+    return ss.str();
+}
+
+
+
+template<class T>
+T*  getLineDouble(const T * h){
+    T* h2 = (T*)h->Clone(h->GetName()+(TString)"dline");
+    h2->SetLineWidth(h->GetLineWidth()+1);
+    h2->SetLineColor(kBlack);
+    h2->SetLineColorAlpha(kBlack,0.8);
+    return h2;
+}
+
+
+
+
+
+#endif /* DEEPJET_MODULES_INTERFACE_HELPER_H_ */
diff --git a/compiled/interface/colorToTColor.h b/compiled/interface/colorToTColor.h
index 010101d..f195b74 100644
--- a/compiled/interface/colorToTColor.h
+++ b/compiled/interface/colorToTColor.h
@@ -45,8 +45,13 @@ int lineToTLineStyle(const TString& str){
 
 int colorToTColor(const TString& str){
     TString copstr=str;
-    copstr.ToLower();
+    TColor col;
 
+    if(copstr.BeginsWith("#")){
+        copstr.Resize(7);
+        return col.GetColor(copstr);
+    }
+    copstr.ToLower();
     if(copstr.Contains("black"))
         return kBlack;
 
@@ -93,7 +98,8 @@ int colorToTColor(const TString& str){
  * green\n\
  * darkgreen\n\
  * purple\n\
- * darkpurple"<<std::endl;
+ * darkpurple\n\
+ * (hex code) "<<std::endl;
     return kBlack;
 
 }
diff --git a/compiled/interface/friendTreeInjector.h b/compiled/interface/friendTreeInjector.h
index 1106e43..e37125b 100644
--- a/compiled/interface/friendTreeInjector.h
+++ b/compiled/interface/friendTreeInjector.h
@@ -18,7 +18,8 @@ class friendTreeInjector{
 	~friendTreeInjector();
 
 	void setSourceTreeName(const TString& sourcetreename){
-		sourcetree_=sourcetreename;
+		sourcetree_='/'+sourcetreename;
+
 	}
 
 	void addFromFile(const TString& filename, const TString& alias="");
diff --git a/compiled/interface/helper.h b/compiled/interface/helper.h
index 4175d9c..f393808 100644
--- a/compiled/interface/helper.h
+++ b/compiled/interface/helper.h
@@ -9,14 +9,105 @@
 #define DEEPJET_MODULES_INTERFACE_HELPER_H_
 
 
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+
 #include <dirent.h>
 #include <stdlib.h>
 #include "TString.h"
+#include "TObject.h"
+#include "TString.h"
+#include <sstream>
+#include <string>
+#include "c_helper.h"
+
+/**
+ * transfers ownership of the data to numpy array if no copy.
+ * size given it nobjects, not in bytes
+ */
+template<class T>
+boost::python::numpy::ndarray STLToNumpy(const T * data, const std::vector<int>& shape, const size_t& size, bool copy=true);
+
+
+
+//////// template implementations
+
+namespace _hidden{
+inline void destroyManagerCObject(PyObject* self) {
+    auto * b = reinterpret_cast<float*>( PyCapsule_GetPointer(self, NULL) );
+    delete [] b;
+}
+}
+
+
+template<class T>
+boost::python::numpy::ndarray STLToNumpy(const T * data, const std::vector<int>& shape, const size_t& size, bool copy){
+
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+
+    if(size>0){
+        p::list pshape;
+        size_t sizecheck = 1;
+        for(size_t i=0;i<shape.size();i++){
+            pshape.append(shape.at(i));
+            sizecheck *= shape.at(i);
+        }
+        if(sizecheck != size)
+            throw std::out_of_range("STLToNumpy: shape and size don't match");
+
+        p::tuple tshape(pshape);
+
+        T * data_ptr = (T *)(void *)data;
+        if(copy){
+            data_ptr = new T[size];
+            memcpy(data_ptr,data,size*sizeof(T));
+        }
+
+        PyObject *capsule = ::PyCapsule_New((void *)data_ptr, NULL, (PyCapsule_Destructor)&_hidden::destroyManagerCObject);
+        boost::python::handle<> h_capsule{capsule};
+        boost::python::object owner_capsule{h_capsule};
+
+        np::ndarray dataarr = np::from_data((void*)data_ptr,
+                np::dtype::get_builtin<T>(),
+                p::make_tuple(size), p::make_tuple(sizeof(T)), owner_capsule );
+        dataarr = dataarr.reshape(tshape);
+
+        return dataarr;
+    }
+    else{
+        return np::empty(p::make_tuple(0), np::dtype::get_builtin<T>());;
+    }
+}
+
+
+
+#include <algorithm>
+#include <random>
+
+template <class T>
+std::vector<T> GenerateRandomVector(int NumberCount,T minimum=0, T maximum=1) {
+    std::random_device rd;
+    std::mt19937 gen(rd()); // these can be global and/or static, depending on how you use random elsewhere
+
+    std::vector<T> values(NumberCount);
+    std::uniform_real_distribution<T> dis(minimum, maximum);
+    std::generate(values.begin(), values.end(), [&](){ return dis(gen); });
+    return values;
+}
 
-TString prependXRootD(const TString& path);
+#include <iostream>
+template <typename T>
+std::ostream& operator<<(std::ostream& output, std::vector<T> const& values)
+{
+    for (auto const& value : values)
+    {
+        output << value << ' ';
+    }
+    output << std::endl;
+    return output;
+}
 
-bool isApprox(const float& a , const float& b, float eps=0.001);
 
-float deltaPhi(const float& phi1, const float& phi2);
 
 #endif /* DEEPJET_MODULES_INTERFACE_HELPER_H_ */
diff --git a/compiled/interface/pythonToSTL.h b/compiled/interface/pythonToSTL.h
index 916c207..6fcb4d5 100644
--- a/compiled/interface/pythonToSTL.h
+++ b/compiled/interface/pythonToSTL.h
@@ -9,7 +9,7 @@
 #define DEEPJET_MODULES_INTERFACE_PYTHONTOSTL_H_
 #include <boost/python.hpp>
 #include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
+#include "boost/python/numpy.hpp"
 #include "boost/python/list.hpp"
 #include "boost/python/str.hpp"
 #include <vector>
@@ -25,7 +25,7 @@ std::vector<T> toSTLVector(const boost::python::list lin){
 }
 
 template<>
-std::vector<TString> toSTLVector(const boost::python::list lin){
+inline std::vector<TString> toSTLVector(const boost::python::list lin){
 	std::vector<TString>  out(boost::python::len(lin));
 	for(size_t i=0;i<boost::python::len(lin);i++){
 		std::string stdstr=boost::python::extract<std::string>(lin[i]);
@@ -48,7 +48,7 @@ std::vector<std::vector<T> > toSTL2DVector(const boost::python::list lin){
 
 
 template<>
-std::vector<std::vector<TString> > toSTL2DVector(const boost::python::list lin){
+inline std::vector<std::vector<TString> > toSTL2DVector(const boost::python::list lin){
 	std::vector<std::vector<TString> > out;
 	for(size_t i=0;i<boost::python::len(lin);i++){
 		std::vector<TString> tmp(boost::python::len(lin[i]));
diff --git a/compiled/interface/quicklzWrapper.h b/compiled/interface/quicklzWrapper.h
new file mode 100644
index 0000000..58d2980
--- /dev/null
+++ b/compiled/interface/quicklzWrapper.h
@@ -0,0 +1,210 @@
+/*
+ * quicklzWrapper.h
+ *
+ *  Created on: 5 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DEEPJETCORE_COMPILED_INTERFACE_QUICKLZWRAPPER_H_
+#define DEEPJETCORE_COMPILED_INTERFACE_QUICKLZWRAPPER_H_
+
+#include "quicklz.h"
+#include <stdio.h>
+#include <vector>
+#include <stdint.h>
+#include <string>
+#include <stdexcept>
+#include "IO.h"
+#include "version.h"
+#include <iostream>
+
+#define QUICKLZ_MAXCHUNK (0xffffffff - 400)
+
+namespace djc{
+template <class T>
+class quicklz{
+public:
+
+    quicklz();
+    ~quicklz();
+
+    void reset();
+
+    //reads header, saves total uncompressed size
+    void readHeader(FILE *& ifile);
+
+    //get uncompressed size to allocate memory if needed
+    //not in bytes but in terms of T
+    size_t getSize()const{return totalbytes_/sizeof(T);}
+
+    //writes from compressed file to memory
+    //returns in terms of T how many elements have been read
+    size_t readCompressedBlock(FILE *& ifile, T * arr);
+
+    //assumes you know the size that is supposed to be read
+    //and memory has been allocated already!
+    //returns in terms of T how many compressed elements have been read (without header)
+    size_t readAll(FILE *& ifile, T * arr);
+
+    //skips over the next compressed block without reading it
+    size_t skipBlock(FILE *& ifile);
+
+    //writes header and compressed data
+    //give size in terms of T
+    void writeCompressed(const T * arr, size_t size, FILE *& ofile);
+
+
+private:
+    std::vector<size_t> chunksizes_;
+    uint8_t nchunks_;
+    size_t totalbytes_;
+    qlz_state_decompress *state_decompress_;
+    qlz_state_compress *state_compress_;
+};
+
+template <class T>
+quicklz<T>::quicklz(){
+    nchunks_=0;
+    totalbytes_=0;
+    state_decompress_ = new qlz_state_decompress();
+    state_compress_ = new qlz_state_compress();
+}
+
+
+template <class T>
+quicklz<T>::~quicklz(){
+    delete state_decompress_;
+    delete state_compress_ ;
+}
+
+template <class T>
+void quicklz<T>::reset(){
+    chunksizes_.clear();
+    nchunks_ = 0;
+    totalbytes_ = 0;
+    delete state_decompress_;
+    delete state_compress_;
+    state_decompress_ = new qlz_state_decompress();
+    state_compress_ = new qlz_state_compress();
+}
+
+template <class T>
+void quicklz<T>::readHeader(FILE *& ifile) {
+    nchunks_ = 0;
+    chunksizes_.clear();
+    totalbytes_ = 0;
+    float version = 0;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version))
+        throw std::runtime_error("quicklz<T>::readHeader: incompatible version");
+    io::readFromFile(&nchunks_,  ifile);
+    chunksizes_ = std::vector<size_t>(nchunks_, 0);
+    io::readFromFile(&chunksizes_[0], ifile, nchunks_);
+    io::readFromFile(&totalbytes_, ifile);
+}
+
+
+
+
+template <class T>
+size_t quicklz<T>::readCompressedBlock(FILE *& ifile, T * arr){
+
+    size_t chunk = 0;
+    size_t allread = 0;
+    char* src = 0;
+    char * dst = (char*)(void*)arr;
+
+    while (chunk < nchunks_ && totalbytes_) {
+        //std::cout << "chunk with size " << chunksizes_.at(chunk) <<" size of " << sizeof(T) <<" total bytes "<< totalbytes_ << std::endl;
+        src = new char[chunksizes_.at(chunk)];
+        io::readFromFile(src, ifile, 0, chunksizes_.at(chunk));
+        size_t readbytes = qlz_size_decompressed(src);
+        //std::cout << "bytes to be decompressed " << readbytes << std::endl;
+
+        allread += qlz_decompress(src, dst, state_decompress_);
+        //std::cout << "decompress success " << readbytes << " allread " << allread << std::endl;
+        chunk++;
+        dst += readbytes;
+        delete src;
+    }
+    if (allread != totalbytes_) {
+        std::string moreinfo = "\nexpected: ";
+        moreinfo += std::to_string(totalbytes_);
+        moreinfo += " got: ";
+        moreinfo += std::to_string(allread);
+        delete state_decompress_;
+        state_decompress_ = 0;
+        throw std::runtime_error((
+                "quicklz::readCompressedBlock: expected size and uncompressed size don't match: "+moreinfo));
+    }
+    return allread / sizeof(T);
+}
+
+
+
+template<class T>
+size_t quicklz<T>::readAll(FILE *& ifile, T * arr) {
+    readHeader(ifile);
+    return readCompressedBlock(ifile, arr);
+}
+
+template<class T>
+size_t quicklz<T>::skipBlock(FILE *& ifile){
+    readHeader(ifile);
+    size_t totalbytescompressed = 0;
+    for(const auto& c:chunksizes_)
+        totalbytescompressed+=c;
+    fseek(ifile,totalbytescompressed,SEEK_CUR);
+    return totalbytescompressed;
+}
+
+template<class T>
+void quicklz<T>::writeCompressed(const T * arr, size_t size, FILE *& ofile) {
+
+    size_t length = size * sizeof(T);
+    const char *src = (const char*) (const void*) arr;
+
+    //destination buffer
+    char *dst = new char[length + 400];
+    size_t remaininglength = length;
+    size_t len2 = 0;
+    size_t startbyte = 0;
+    uint8_t nchunks = 1;
+    std::vector<size_t> chunksizes;
+
+    while (remaininglength) {
+
+        size_t uselength = 0;
+        if (remaininglength > QUICKLZ_MAXCHUNK) {
+            uselength = QUICKLZ_MAXCHUNK;
+            remaininglength -= QUICKLZ_MAXCHUNK;
+            nchunks++;
+            if (!nchunks) {
+                throw std::runtime_error(
+                        "quicklz::writeCompressed: array size too big (O(TB))!");
+            }
+
+        } else {
+            uselength = remaininglength;
+            remaininglength = 0;
+        }
+        size_t thissize = qlz_compress(&src[startbyte], &dst[len2], uselength,
+                state_compress_);
+        chunksizes.push_back(thissize);
+        len2 += thissize;
+        startbyte += uselength;
+    }
+    float version = DJCDATAVERSION;
+    io::writeToFile(&version,ofile);
+    io::writeToFile(&nchunks,ofile);
+    io::writeToFile(&chunksizes[0],ofile,chunksizes.size());
+    io::writeToFile(&length, ofile);
+    io::writeToFile(dst, ofile, 0, len2);
+
+    //end
+    delete dst;
+}
+
+}//namespace
+
+#endif
diff --git a/compiled/interface/rocCurve.h b/compiled/interface/rocCurve.h
index e033198..bf21776 100644
--- a/compiled/interface/rocCurve.h
+++ b/compiled/interface/rocCurve.h
@@ -57,6 +57,9 @@ class rocCurve{
 		linewidth_=width;
 	}
 
+	void scaleYAxis(const float& scale){yscale_=scale;}
+	const float& getYAxisScaling()const{return yscale_;}
+
 	const TString& name()const{return name_;}
      TString compatName()const{
         TString namecp=name_;
@@ -101,6 +104,9 @@ class rocCurve{
 	int linecol_,linewidth_,linestyle_;
 
 	bool fullanalysis_;
+	float yscale_;
+
+	double scaledSoftsign(double x, double scaler)const;
 
 };
 
diff --git a/compiled/interface/rocCurveCollection.h b/compiled/interface/rocCurveCollection.h
index 8c7dd78..efe6955 100644
--- a/compiled/interface/rocCurveCollection.h
+++ b/compiled/interface/rocCurveCollection.h
@@ -53,12 +53,13 @@ class rocCurveCollection{
 //		const TString& vetotruth, int linecolstyle, const TString& cuts="",int linestyle=1);
 
 	void addROC(const TString& name, const TString& probability, const TString& truth,
-		const TString& vetotruth, const TString& linecolstyle, const TString& cuts="",const TString& invalidateif="");
+		const TString& vetotruth, const TString& linecolstyle, const TString& cuts="",const TString& invalidateif="", float yscale=1.);
 
 	void addText(TLatex *l){additionaltext_.push_back(l);}
 
 	void printRocs(TChain* c, const TString& outpdf,const TString&outfile="",TCanvas* cv=0, TFile * f=0,
-	        std::vector<TChain*>* chainvec=0,double xmin_in=-1);
+	        std::vector<TChain*>* chainvec=0,double xmin_in=-1,
+			TString experimentlabel="",TString lumilabel="",TString prelimlabel="");
 
 private:
 	TLegend * leg_;
diff --git a/compiled/interface/simpleArray.h b/compiled/interface/simpleArray.h
new file mode 100644
index 0000000..a959078
--- /dev/null
+++ b/compiled/interface/simpleArray.h
@@ -0,0 +1,1373 @@
+/*
+ * simpleArray.h
+ *
+ *  Created on: 5 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAY_H_
+#define DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAY_H_
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include <boost/python/exception_translator.hpp>
+#include "helper.h"
+#include "pythonToSTL.h"
+
+#include "c_helper.h"
+#include <cmath>
+#include <vector>
+#include <string>
+#include <stdio.h>
+#include "quicklzWrapper.h"
+#include <cstring> //memcpy
+#include "IO.h"
+#include "version.h"
+#include <iostream>
+#include <cstdint>
+#include <sstream>
+#include <cmath>
+
+namespace djc{
+
+
+//has all non-data operations
+class simpleArrayBase {
+public:
+
+    enum dtypes{float32,int32,undef};
+
+    simpleArrayBase():size_(0),assigned_(false) {
+    }
+    virtual ~simpleArrayBase(){}
+
+    simpleArrayBase(std::vector<int> shape,const std::vector<int64_t>& rowsplits = {});
+
+
+  //  virtual simpleArrayBase& operator=(simpleArrayBase &&)=0;
+
+    virtual void clear()=0;
+
+    virtual void setShape(std::vector<int> shape,const std::vector<int64_t>& rowsplits = {})=0;
+
+    virtual dtypes dtype()const{return undef;}
+    int dtypeI()const{return (int)dtype();}
+    std::string dtypeString()const{
+        return dtypeToString(dtype());
+    }
+
+    void setName(const std::string& name){name_=name;}
+    std::string name()const{return name_;}
+    void setFeatureNames(const std::vector<std::string>& names){featnames_=names;}
+    const std::vector<std::string>& featureNames()const{return featnames_;}
+
+    virtual void fillZeros()=0;
+
+    virtual void set(const size_t i, float val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, float val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, float val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, const size_t l, float val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, const size_t l, const size_t m, float val){throwWrongTypeSet();}
+
+    virtual void set(const size_t i, int val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, int val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, int val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, const size_t l, int val){throwWrongTypeSet();}
+    virtual void set(const size_t i, const size_t j, const size_t k, const size_t l, const size_t m, int val){throwWrongTypeSet();}
+
+    static std::string dtypeToString(dtypes t);
+    static dtypes stringToDtype(const std::string& s);
+
+    const std::vector<int>& shape() const {
+        return shape_;
+    }
+
+    virtual bool hasNanOrInf()const=0;
+
+    boost::python::list shapePy()const;
+
+    const size_t& size() const {
+        return size_;
+    }
+
+    bool isRagged()const{
+        return rowsplits_.size()>0;
+    }
+
+    /*
+     * returns the dimension of the first axis.
+     * If second dimension is ragged, this will take it into
+     * account.
+     */
+    size_t getFirstDimension()const{
+        if(!size_ || !shape_.size())
+            return 0;
+        return shape_.at(0);
+    }
+
+    const std::vector<int64_t>& rowsplits() const {
+        return rowsplits_;
+    }
+
+    virtual void assignShape(std::vector<int> s)=0;
+
+
+    virtual size_t validSlices(std::vector<size_t> splits)const=0;
+    virtual bool validSlice(size_t splitindex_begin, size_t splitindex_end)const=0;
+
+    virtual void addToFileP(FILE *& ofile) const=0;
+    virtual void readFromFileP(FILE *& ifile,bool skip_data=false)=0;
+    virtual void writeToFile(const std::string& f)const=0;
+    virtual void readFromFile(const std::string& f)=0;
+
+    void skipToNextArray(FILE *& ofile)const;
+    /**
+     * this goes back to the start of the header!
+     */
+    std::string readDtypeFromFileP(FILE *& ofile)const;
+    std::string readDtypeFromFile(const std::string& f)const;
+
+    dtypes readDtypeTypeFromFileP(FILE *& ofile)const;
+    dtypes readDtypeTypeFromFile(const std::string& f)const;
+
+    virtual void cout()const=0;
+
+    virtual void append(const simpleArrayBase& )=0;
+
+    /**
+     * Split indices can directly be used with the split() function.
+     * Returns elements e.g. {2,5,3,2}, which corresponds to DataSplitIndices of {2,7,10,12}
+     */
+    static std::vector<size_t>  getSplitIndices(const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+            bool sqelementslimit, bool strict_limit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split);
+
+    /**
+     * Split indices can directly be used with the split() function.
+     * Returns row splits e.g. {2,7,10,12} which corresponds to Split indices of {2,5,3,2}
+     */
+    static std::vector<size_t>  getDataSplitIndices(const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+            bool sqelementslimit, bool strict_limit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split);
+
+    /**
+     * Transforms row splits to n_elements per ragged sample
+     */
+    static std::vector<int64_t>  dataSplitToSplitIndices(const std::vector<int64_t>& row_splits);
+
+    /**
+     * Transforms n_elements per ragged sample to row splits
+     */
+    static std::vector<int64_t>  splitToDataSplitIndices(const std::vector<int64_t>& data_splits);
+
+
+    static std::vector<int64_t> readRowSplitsFromFileP(FILE *& f, bool seeknext=true);
+
+    static std::vector<int64_t> mergeRowSplits(const std::vector<int64_t> & rowsplitsa, const std::vector<int64_t> & rowsplitsb);
+
+    static std::vector<int64_t> splitRowSplits(std::vector<int64_t> & rowsplits, const size_t& splitpoint);
+
+
+    int isize() const {
+        return (int)size_;
+    }
+
+
+
+
+    //does not transfer data ownership! only for quick writing etc.
+    virtual void assignFromNumpy(const boost::python::numpy::ndarray& ndarr,
+            const boost::python::numpy::ndarray& rowsplits=boost::python::numpy::empty(
+                    boost::python::make_tuple(0), boost::python::numpy::dtype::get_builtin<size_t>()))=0;
+
+    //copy data
+    virtual void createFromNumpy(const boost::python::numpy::ndarray& ndarr,
+            const boost::python::numpy::ndarray& rowsplits=boost::python::numpy::empty(
+                    boost::python::make_tuple(0), boost::python::numpy::dtype::get_builtin<size_t>()))=0;
+
+    //transfers data ownership and cleans simpleArray instance
+    virtual boost::python::tuple transferToNumpy(bool pad_rowsplits=false)=0;
+
+    //copy data
+    virtual boost::python::tuple copyToNumpy(bool pad_rowsplits=false)const=0;
+
+    virtual void setFeatureNamesPy(boost::python::list l)=0;
+    virtual boost::python::list featureNamesPy()=0;
+
+
+protected:
+    std::vector<int> shape_;
+    std::string name_;
+    std::vector<std::string> featnames_;
+    //this is int64 for better feeding to TF
+    std::vector<int64_t> rowsplits_;
+    size_t size_;
+    bool assigned_;
+
+
+    size_t sizeFromShape(const std::vector<int>& shape) const;
+    std::vector<int> shapeFromRowsplits()const; //split dim = 1!
+    void checkShape(size_t ndims)const;
+    void checkSize(size_t idx)const;
+    void checkRaggedIndex(size_t i, size_t j)const;
+
+    void getFlatSplitPoints(size_t splitindex_begin, size_t splitindex_end,
+            size_t & splitpoint_start, size_t & splitpoint_end)const;
+
+private:
+
+    void throwWrongTypeSet()const{throw std::invalid_argument("simpleArrayBase::set: wrong data format");}
+
+    static std::vector<size_t>  priv_getSplitIndices(bool datasplit, const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+                bool sqelementslimit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split, bool strict_limit);
+
+
+};
+
+
+template<class T>
+class simpleArray: public simpleArrayBase { //inherits and implements data operations
+public:
+
+
+    simpleArray();
+    // row splits are indicated by a merged dimension with negative sign
+    // e.g. A x B x C x D, where B is ragged would get shape
+    // A x -nElementsTotal x C x D
+    // ROW SPLITS START WITH 0 and end with the total number of elements along that dimension
+    // therefore, the rosplits vector size is one more than the first dimension
+    //
+    // Only ONLY DIMENSION 1 AS RAGGED DIMENSION is supported, first dimension MUST NOT be ragged.
+    //
+
+    simpleArray(std::vector<int> shape,const std::vector<int64_t>& rowsplits = {});
+    simpleArray(FILE *& );
+    ~simpleArray();
+
+    simpleArray(const simpleArray<T>&);
+    simpleArray<T>& operator=(const simpleArray<T>&);
+
+    simpleArray(simpleArray<T> &&);
+    simpleArray<T>& operator=(simpleArray<T> &&);
+
+    dtypes dtype()const;
+
+    bool operator==(const simpleArray<T>& rhs)const;
+    bool operator!=(const simpleArray<T>& rhs)const { return !(*this == rhs); }
+
+    void clear();
+
+    bool hasNanOrInf()const;
+
+    //reshapes if possible, creates new else
+    void setShape(std::vector<int> shape,const std::vector<int64_t>& rowsplits = {});
+
+    T * data() const {
+        return data_;
+    }
+
+    T * data() {
+        return data_;
+    }
+
+    /////////// potentially dangerous operations for conversions, use with care ///////
+
+    /*
+     * Move data memory location to another object
+     */
+    T * disownData();
+
+    /*
+     * Object will not own the data. Merely useful for conversion
+     * with immediate writing to file
+     */
+    void assignData(T *d){
+        if(data_ && !assigned_)
+            delete data_;
+        data_=d;
+        assigned_=true;
+    }
+
+    /*
+     * Assigns a shape without checking it or creating a new data
+     * array. Will recalculate total size
+     */
+    void assignShape(std::vector<int> s){
+        shape_=s;
+        size_ = sizeFromShape(s);
+    }
+
+    /*
+     * Splits on first axis.
+     * Returns the first part, leaves the second.
+     * does memcopy for both pats now
+     */
+    simpleArray<T> split(size_t splitindex);
+
+    simpleArray<T> getSlice(size_t splitindex_begin, size_t splitindex_end) const;
+
+    /*
+     *
+     */
+    size_t validSlices(std::vector<size_t> splits)const;
+    bool validSlice(size_t splitindex_begin, size_t splitindex_end)const;
+
+
+    simpleArray<T> shuffle(const std::vector<size_t>& shuffle_idxs)const;
+    /*
+     * appends along first axis
+     * Cann append to an empty array (same as copy)
+     */
+    void append(const simpleArray<T>& a);
+    void append(const simpleArrayBase& );
+
+
+
+    /* file IO here
+     * format: non compressed header (already writing rowsplits!):
+     * size, shape.size(), [shape], rowsplits.size(), [rowsplits], compr: [data]
+     *
+     */
+    void addToFileP(FILE *& ofile) const;
+    void readFromFileP(FILE *& ifile,bool skip_data=false);
+
+    void writeToFile(const std::string& f)const;
+    void readFromFile(const std::string& f);
+
+
+    void cout()const;
+
+
+
+    size_t sizeAt(size_t i)const;
+    // higher dim row splits size_t sizeAt(size_t i,size_t j)const;
+    // higher dim row splits size_t sizeAt(size_t i,size_t j, size_t k)const;
+    // higher dim row splits size_t sizeAt(size_t i,size_t j, size_t k, size_t l)const;
+    // higher dim row splits size_t sizeAt(size_t i,size_t j, size_t k, size_t l, size_t m)const;
+
+    /*
+     * Does not work (yet) with ragged arrays!
+     * Will just produce garbage!
+     */
+
+    T & at(size_t i);
+    const T & at(size_t i)const;
+    T & at(size_t i, size_t j);
+    const T & at(size_t i, size_t j)const;
+    T & at(size_t i, size_t j, size_t k);
+    const T & at(size_t i, size_t j, size_t k)const;
+    T & at(size_t i, size_t j, size_t k, size_t l);
+    const T & at(size_t i, size_t j, size_t k, size_t l)const;
+    T & at(size_t i, size_t j, size_t k, size_t l, size_t m);
+    const T & at(size_t i, size_t j, size_t k, size_t l, size_t m)const;
+    T & at(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n);
+    const T & at(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n)const;
+
+    void fillZeros();
+
+    void set(const size_t i, T val){at(i)=val;}
+    void set(const size_t i, const size_t j, T val){at(i,j)=val;}
+    void set(const size_t i, const size_t j, const size_t k, T val){at(i,j,k)=val;}
+    void set(const size_t i, const size_t j, const size_t k, const size_t l, T val){at(i,j,k,l)=val;}
+    void set(const size_t i, const size_t j, const size_t k, const size_t l, const size_t m, T val){at(i,j,k,l,m)=val;}
+
+
+
+
+    //does not transfer data ownership! only for quick writing etc.
+    void assignFromNumpy(const boost::python::numpy::ndarray& ndarr,
+            const boost::python::numpy::ndarray& rowsplits=boost::python::numpy::empty(
+                    boost::python::make_tuple(0), boost::python::numpy::dtype::get_builtin<size_t>()));
+
+    //copy data
+    void createFromNumpy(const boost::python::numpy::ndarray& ndarr,
+            const boost::python::numpy::ndarray& rowsplits=boost::python::numpy::empty(
+                    boost::python::make_tuple(0), boost::python::numpy::dtype::get_builtin<size_t>()));
+
+    //transfers data ownership and cleans simpleArray instance
+    boost::python::tuple transferToNumpy(bool pad_rowsplits=false);
+
+    //copy data
+    boost::python::tuple copyToNumpy(bool pad_rowsplits=false)const;
+
+    void setFeatureNamesPy(boost::python::list l);
+    boost::python::list featureNamesPy();
+
+
+
+private:
+    size_t flatindex(size_t i, size_t j)const;
+    size_t flatindex(size_t i, size_t j, size_t k)const;
+    size_t flatindex(size_t i, size_t j, size_t k, size_t l)const;
+    size_t flatindex(size_t i, size_t j, size_t k, size_t l, size_t m)const;
+    size_t flatindex(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n)const;
+
+    std::vector<int64_t> padRowsplits()const;
+
+
+    void copyFrom(const simpleArray<T>& a);
+    void moveFrom(simpleArray<T> && a);
+
+
+
+
+    std::vector<int> makeNumpyShape()const;
+    void checkArray(const boost::python::numpy::ndarray& ndarr,
+            boost::python::numpy::dtype dt=boost::python::numpy::dtype::get_builtin<T>())const;
+    void fromNumpy(const boost::python::numpy::ndarray& ndarr,
+                const boost::python::numpy::ndarray& rowsplits,
+                bool copy);
+
+
+    T * data_;
+};
+
+/* for later
+template<class T>
+class simpleArrayIndex {
+public:
+    simpleArrayIndex(simpleArray<T>& a, const int i):arr_(a){
+
+    }
+    simpleArrayIndex(const T&){
+    //set value
+    }
+
+    operator T&() { return val; }
+    operator T() const { return val; }
+
+    simpleArrayIndex operator[](const int i){
+        return simpleArrayIndex(arr_,i);
+    }
+
+private:
+    simpleArray<T>& arr_;
+    //some indexing
+};
+*/
+
+template<class T>
+simpleArray<T>::simpleArray() :
+simpleArrayBase(),
+        data_(0) {
+}
+
+template<class T>
+simpleArray<T>::simpleArray(std::vector<int> shape,const std::vector<int64_t>& rowsplits) :
+simpleArrayBase(shape,rowsplits) {
+    data_ = new T[size_];
+}
+
+template<class T>
+simpleArray<T>::simpleArray(FILE *& ifile):simpleArray<T>(){
+    data_=0;
+    readFromFileP(ifile);
+    assigned_=false;
+}
+
+template<class T>
+simpleArray<T>::~simpleArray() {
+    clear();
+}
+
+template<class T>
+simpleArray<T>::simpleArray(const simpleArray<T>& a) :
+        simpleArray<T>() {
+    data_=0;
+    copyFrom(a);
+}
+
+template<class T>
+simpleArray<T>& simpleArray<T>::operator=(const simpleArray<T>& a) {
+    copyFrom(a);
+    return *this;
+}
+
+template<class T>
+simpleArray<T>::simpleArray(simpleArray<T> && a) :
+        simpleArray<T>() {
+    if (&a == this){
+        return;}
+    if (data_&& !assigned_)
+        delete data_;
+    name_=a.name_;
+    featnames_=a.featnames_;
+    data_ = a.data_;
+    a.data_ = 0;
+    assigned_ = a.assigned_;
+    size_ = a.size_;
+    a.size_ = 0;
+    shape_ = std::move(a.shape_);
+    a.shape_ = std::vector<int>();
+    rowsplits_ = std::move(a.rowsplits_);
+    a.rowsplits_= std::vector<int64_t>();
+    a.clear();
+}
+
+
+template<class T>
+simpleArray<T>& simpleArray<T>::operator=(simpleArray<T> && a) {
+    if (&a == this)
+        return *this;
+    if (data_ && !assigned_)
+        delete data_;
+    name_=a.name_;
+    featnames_=a.featnames_;
+    data_ = a.data_;
+    a.data_ = 0;
+    size_ = a.size_;
+    assigned_ = a.assigned_;
+    a.size_ = 0;
+    shape_ = std::move(a.shape_);
+    a.shape_ = std::vector<int>();
+    rowsplits_ = std::move(a.rowsplits_);
+    a.rowsplits_= std::vector<int64_t>();
+    return *this;
+}
+
+template<class T>
+bool simpleArray<T>::operator==(const simpleArray<T>& rhs)const{
+    if(this == &rhs)
+        return true;
+    if(dtype() !=rhs.dtype())
+        return false;
+    if(name_!=rhs.name_)
+        return false;
+    if(featnames_!=rhs.featnames_)
+        return false;
+    if(size_!=rhs.size_)
+        return false;
+    if(shape_!=rhs.shape_)
+        return false;
+    if(rowsplits_!=rhs.rowsplits_)
+        return false;
+    //finally check data
+    for(size_t i=0;i<size_;i++){
+        if(data_[i]!=rhs.data_[i])
+            return false;
+    }
+    return true;
+}
+
+
+template<class T>
+void simpleArray<T>::clear() {
+    if (data_&& !assigned_)
+        delete data_;
+    data_ = 0;
+    shape_.clear();
+    rowsplits_.clear();
+    size_ = 0;
+    name_="";
+    featnames_.clear();
+}
+
+template<class T>
+bool simpleArray<T>::hasNanOrInf()const{
+    for(size_t i=0;i<size_;i++){
+        if(std::isinf(data_[i]) || std::isnan(data_[i]))
+            return true;
+    }
+    return false;
+}
+
+template<class T>
+void simpleArray<T>::setShape(std::vector<int> shape,const std::vector<int64_t>& rowsplits) {
+    if(rowsplits.size()){
+        *this = simpleArray<T>(shape,rowsplits);
+    }
+    int size = sizeFromShape(shape);
+    if (size != size_) {
+        *this = simpleArray<T>(shape);
+    } else if (size == size_) {
+        shape_ = shape;
+    }
+}
+
+
+template<class T>
+T * simpleArray<T>::disownData() {
+    T * dp = data_;
+    data_ = 0;
+    return dp;
+}
+
+/*
+ * Splits on first axis.
+ * Returns the first part, leaves the second
+ * for ragged it is the number of elements index - need to be consistent with the rowplits
+ *
+ * add function 'size_t getClosestSplitPoint(size_t splitnelements, bool down=True)'
+ *
+ * for ragged, the split point is the INDEX IN THE ROWSPLIT VECTOR!
+ *
+ */
+template<class T>
+simpleArray<T> simpleArray<T>::split(size_t splitindex) {
+    simpleArray<T> out;
+    if (!shape_.size() || ( !isRagged() && splitindex > shape_.at(0))) {
+        std::stringstream errMsg;
+        errMsg << "simpleArray<T>::split: splitindex > shape_[0] : ";
+        if(shape_.size())
+            errMsg << splitindex << ", " << shape_.at(0);
+        else
+            errMsg <<"shape size: " << shape_.size() <<" empty array cannot be split.";
+        cout();
+        throw std::runtime_error(
+                errMsg.str().c_str());
+    }
+    if(splitindex == shape_.at(0)){//exactly the whole array
+        out = *this;
+        clear();
+        return out;
+    }
+
+    if(isRagged() && splitindex >  rowsplits_.size()){
+        std::cout << "split index " << splitindex  << " range: " << rowsplits_.size()<< std::endl;
+        throw std::runtime_error(
+                "simpleArray<T>::split: ragged split index out of range");
+    }
+
+
+    //get split point for data
+    ///insert rowsplit logic below
+    size_t splitpoint = splitindex;
+    if(isRagged()){
+        splitpoint = rowsplits_.at(splitindex);
+        for (size_t i = 2; i < shape_.size(); i++)
+            splitpoint *= (size_t)std::abs(shape_.at(i));
+    }
+    else{
+        for (size_t i = 1; i < shape_.size(); i++)
+            splitpoint *= (size_t)std::abs(shape_.at(i));
+    }
+
+
+    size_t remaining = size_ - splitpoint;
+
+    T * odata = new T[splitpoint];
+    T * rdata = new T[remaining];
+
+    memcpy(odata, data_, splitpoint * sizeof(T));
+    memcpy(rdata, data_ + splitpoint, remaining * sizeof(T));
+    if(!assigned_)
+        delete data_;
+    out.data_ = odata;
+    data_ = rdata;
+    ///insert rowsplit logic below
+    out.shape_ = shape_;
+    out.shape_.at(0) = splitindex;
+    shape_.at(0) = shape_.at(0) - splitindex;
+    if(isRagged()){
+
+        out.rowsplits_ = splitRowSplits(rowsplits_, splitindex);
+        out.shape_ = out.shapeFromRowsplits();
+        shape_ = shapeFromRowsplits();
+    }
+    ///
+    out.size_ = sizeFromShape(out.shape_);
+    size_ = sizeFromShape(shape_);
+    out.featnames_ = featnames_;
+    out.name_ = name_;
+    return out;
+}
+
+
+
+template<class T>
+simpleArray<T> simpleArray<T>::getSlice(size_t splitindex_begin, size_t splitindex_end) const{
+    simpleArray<T> out;
+    if (!shape_.size() || ( !isRagged() && (splitindex_end > shape_.at(0) || splitindex_begin > shape_.at(0))) ) {
+        std::stringstream errMsg;
+        errMsg << "simpleArray<T>::slice: splitindex_end > shape_[0] : ";
+        if(shape_.size())
+            errMsg << splitindex_end << ", " << shape_.at(0);
+        else
+            errMsg <<"shape size: " << shape_.size() <<" empty array cannot be split.";
+        cout();
+        throw std::runtime_error(
+                errMsg.str().c_str());
+    }
+    if(splitindex_end == shape_.at(0) && splitindex_begin==0){//exactly the whole array
+        out = *this;
+        return out;
+    }
+
+    if(isRagged() && (splitindex_end >=  rowsplits_.size() || splitindex_begin>= rowsplits_.size())){
+        std::cout << "split index " << splitindex_end  << " - "<< splitindex_begin<< " allowed: " << rowsplits_.size()<< std::endl;
+        throw std::runtime_error(
+                "simpleArray<T>::slice: ragged split index out of range");
+    }
+    if(splitindex_end == splitindex_begin){
+        //throw std::runtime_error("simpleArray<T>::slice: attempt to create empty slice");
+        //actually, allow this here and let the problem be handled further down the line, just throw warning for now
+        std::cout << "simpleArray<T>::slice: WARNING: attempt to create empty slice at "<< splitindex_begin <<std::endl;
+    }
+
+    //for arrays larger than 8/16(?) GB, size_t is crucial
+    size_t splitpoint_start, splitpoint_end;
+    getFlatSplitPoints(splitindex_begin,splitindex_end,
+            splitpoint_start, splitpoint_end );
+
+    T * odata = new T[splitpoint_end-splitpoint_start];
+    memcpy(odata, data_+splitpoint_start, (splitpoint_end-splitpoint_start) * sizeof(T));
+
+    out.data_ = odata;
+
+    out.shape_ = shape_;
+    out.shape_.at(0) = splitindex_end-splitindex_begin;
+
+    if(isRagged()){
+        auto rscopy = rowsplits_;
+        rscopy = splitRowSplits(rscopy, splitindex_end);
+        splitRowSplits(rscopy, splitindex_begin);
+        out.rowsplits_ = rscopy;
+        out.shape_ = out.shapeFromRowsplits();
+    }
+    ///
+    out.size_ = sizeFromShape(out.shape_);
+    out.name_ = name_;
+    out.featnames_ = featnames_;
+
+    return out;
+
+}
+
+
+template<class T>
+size_t simpleArray<T>::validSlices(std::vector<size_t> splits)const{
+    size_t out=0;
+    if(!isRagged()){
+        while(splits.at(out) <= shape_.at(0) && out< splits.size())
+            out++;
+        return out;
+    }
+    else{
+        while(splits.at(out) < rowsplits_.size() && out < splits.size())
+            out++;
+        return out;
+    }
+}
+
+template<class T>
+bool simpleArray<T>::validSlice(size_t splitindex_begin, size_t splitindex_end)const{
+    if (!shape_.size() || ( !isRagged() && (splitindex_end > shape_.at(0) || splitindex_begin > shape_.at(0))) )
+        return false;
+    if(isRagged() && (splitindex_end >=  rowsplits_.size() || splitindex_begin>= rowsplits_.size()))
+        return false;
+    return true;
+}
+
+
+
+template<class T>
+simpleArray<T> simpleArray<T>::shuffle(const std::vector<size_t>& shuffle_idxs)const{
+    //check
+    bool isvalid = true;
+    for(const auto& idx: shuffle_idxs){
+        isvalid &= validSlice(idx,idx+1);
+    }
+    if(!isvalid)
+        throw std::runtime_error("simpleArray<T>::shuffle: indices not valid");
+
+    //copy data
+    auto out=*this;
+    size_t next=0;
+    for(const auto idx: shuffle_idxs){
+
+        size_t source_splitpoint_start, source_splitpoint_end;
+        getFlatSplitPoints(idx,idx+1,
+                source_splitpoint_start, source_splitpoint_end );
+        size_t n_elements = source_splitpoint_end-source_splitpoint_start;
+        memcpy(out.data_+next,
+                data_+source_splitpoint_start,n_elements  * sizeof(T));
+
+        next+=n_elements;
+    }
+    //recreate row splits
+    if(isRagged()){
+        auto nelems = dataSplitToSplitIndices(rowsplits_);
+        auto new_nelems=nelems;
+        for(size_t i=0;i<shuffle_idxs.size();i++)
+            new_nelems.at(i)=nelems.at(shuffle_idxs.at(i));
+        out.rowsplits_ = splitToDataSplitIndices(new_nelems);
+    }
+    return out;
+}
+
+
+/*
+ * Merges along first axis
+ */
+template<class T>
+void simpleArray<T>::append(const simpleArray<T>& a) {
+
+    if (!data_ && size_ == 0) {
+        //just save feature names and name
+        auto namesv = name_;
+        auto fnamesv = featnames_;
+        *this = a;
+        name_=namesv;
+        featnames_ = fnamesv;
+        return;
+    }
+    if (shape_.size() != a.shape_.size())
+        throw std::out_of_range(
+                "simpleArray<T>::append: shape dimensions don't match");
+    if(isRagged() != a.isRagged())
+        throw std::out_of_range(
+                "simpleArray<T>::append: can't append ragged to non ragged or vice versa");
+
+    std::vector<int> targetshape;
+    if (shape_.size() > 1 && a.shape_.size() > 1) {
+        size_t offset = 1;
+        if(isRagged())
+            offset = 2;
+
+        std::vector<int> highshape = std::vector<int>(shape_.begin() + offset,
+                shape_.end());
+        std::vector<int> ahighshape = std::vector<int>(a.shape_.begin() + offset,
+                a.shape_.end());
+        if (highshape != ahighshape) {
+            throw std::out_of_range(
+                    "simpleArray<T>::append: all shapes but first axis must match");
+        }
+        targetshape.push_back(shape_.at(0) + a.shape_.at(0));
+        if(isRagged())
+            targetshape.push_back(-1);
+        targetshape.insert(targetshape.end(), highshape.begin(),
+                highshape.end());
+    } else {
+        targetshape.push_back(shape_.at(0) + a.shape_.at(0));
+    }
+
+    T * ndata = new T[size_ + a.size_];
+    memcpy(ndata, data_, size_ * sizeof(T));
+    memcpy(ndata + size_, a.data_, a.size_ * sizeof(T));
+    if(!assigned_)
+        delete data_;
+    data_ = ndata;
+    size_ = size_ + a.size_;
+    ///insert rowsplit logic below
+    shape_ = targetshape;
+    //recalculate -XxY part of the shape
+    //append the row splits if dimensions match (- on same axis)
+    ///
+    if(isRagged()){
+        //need copy in case this == &a
+        auto ars = a.rowsplits_;
+
+        rowsplits_ = mergeRowSplits(rowsplits_, ars);
+
+        shape_ = shapeFromRowsplits();//last
+    }
+}
+
+template<class T>
+void simpleArray<T>::append(const simpleArrayBase& arr){
+    if(dtype() != arr.dtype())
+        throw std::runtime_error("simpleArray<T>::append: needs to be same dtype");
+    append(dynamic_cast<const simpleArray<T> &>(arr));
+}
+
+
+template<class T>
+void simpleArray<T>::addToFileP(FILE *& ofile) const {
+
+    float version = DJCDATAVERSION;
+    io::writeToFile(&version, ofile);
+    auto tdtype = dtype();
+    io::writeToFile(&tdtype, ofile);
+    io::writeToFile(&name_, ofile);
+    io::writeToFile(&featnames_, ofile);
+    io::writeToFile(&size_, ofile);
+    size_t ssize = shape_.size();
+    io::writeToFile(&ssize, ofile);
+    io::writeToFile(&shape_[0], ofile, shape_.size());
+
+    size_t rssize = rowsplits_.size();
+    io::writeToFile(&rssize,  ofile);
+
+    if(rssize){
+        quicklz<int64_t> iqlz;
+        iqlz.writeCompressed(&rowsplits_[0],rssize , ofile);
+    }
+    quicklz<T> qlz;
+    qlz.writeCompressed(data_, size_, ofile);
+
+}
+
+template<class T>
+void simpleArray<T>::readFromFileP(FILE *& ifile, bool skip_data) {
+    clear();
+
+    float version = 0;
+    io::readFromFile(&version, ifile);
+
+    if(!checkVersionCompatible(version)){
+        throw std::runtime_error("simpleArray<T>::readFromFile: wrong format version");
+    }
+    dtypes rdtype=dtype();
+    if(checkVersionStrict(version)){
+        io::readFromFile(&rdtype, ifile);
+        io::readFromFile(&name_, ifile);
+        io::readFromFile(&featnames_, ifile);
+    }
+    if(rdtype!=dtype())
+        throw std::runtime_error("simpleArray<T>::readFromFileP: wrong dtype");
+
+    io::readFromFile(&size_, ifile);
+
+    size_t shapesize = 0;
+    io::readFromFile(&shapesize, ifile);
+    shape_ = std::vector<int>(shapesize, 0);
+    io::readFromFile(&shape_[0], ifile, shapesize);
+
+    size_t rssize = 0;
+    io::readFromFile(&rssize, ifile);
+    rowsplits_ = std::vector<int64_t>(rssize, 0);
+
+    if(rssize){
+        quicklz<int64_t> iqlz;
+        iqlz.readAll(ifile, &rowsplits_[0]);
+    }
+
+    quicklz<T> qlz;
+    if(skip_data){
+        if(rowsplits_.size())
+            rowsplits_={(int64_t)0};
+        else
+            rowsplits_.clear();
+        data_=0;
+        size_=0;
+        shape_.at(0)=0;
+        qlz.skipBlock(ifile);
+        return;
+    }
+
+    data_ = new T[size_];
+    size_t nread = qlz.readAll(ifile, data_);
+    if (nread != size_)
+        throw std::runtime_error(
+                "simpleArray<T>::readFromFile: expected and observed length don't match");
+
+}
+
+
+
+template<class T>
+void simpleArray<T>::writeToFile(const std::string& f)const{
+    FILE *ofile = fopen(f.data(), "wb");
+    float version = DJCDATAVERSION;
+    io::writeToFile(&version, ofile);
+    addToFileP(ofile);
+    fclose(ofile);
+
+}
+template<class T>
+void simpleArray<T>::readFromFile(const std::string& f){
+    clear();
+    FILE *ifile = fopen(f.data(), "rb");
+    if(!ifile)
+        throw std::runtime_error("simpleArray<T>::readFromFile: file "+f+" could not be opened.");
+    float version = 0;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version))
+        throw std::runtime_error("simpleArray<T>::readFromFile: wrong format version: "+std::to_string(version));
+    readFromFileP(ifile);
+    fclose(ifile);
+}
+
+template<class T>
+void simpleArray<T>::cout()const{
+    std::cout << "name: " << name_ << std::endl;
+    for(int i=0;i<size();i++){
+        std::cout << data()[i] << ", ";
+    }
+    std::cout << std::endl;
+    for(const auto s: shape())
+        std::cout << s << ", ";
+    if(isRagged()){
+        std::cout << "\nrow splits " << std::endl;
+        if(rowsplits().size()){
+            for(const auto s: rowsplits())
+                std::cout << s << ", ";
+        }
+    }
+    std::cout << "data size "<< size() <<std::endl;
+    std::cout << "feature names "<< featureNames() <<std::endl;
+    std::cout << std::endl;
+}
+
+
+
+
+template<class T>
+void simpleArray<T>::copyFrom(const simpleArray<T>& a) {
+
+    if (&a == this) {
+        return;
+    }
+    if (data_&& !assigned_)
+        delete data_;
+    name_=a.name_;
+    featnames_=a.featnames_;
+    data_ = new T[a.size_];
+    memcpy(data_, a.data_, a.size_ * sizeof(T));
+
+    size_ = a.size_;
+    shape_ = a.shape_;
+    rowsplits_ = a.rowsplits_;
+    assigned_=false;
+}
+
+
+
+// rowsplit support being added here (see whiteboard)
+template<class T>
+size_t simpleArray<T>::flatindex(size_t i, size_t j)const{
+    size_t flat = 0;
+    if(isRagged()){
+        checkRaggedIndex(i,j);
+        flat = rowsplits_.at(i)+j;}
+    else{
+        flat = j + shape_.at(1)*i;}
+    return flat;
+}
+
+//this can also be ragged
+template<class T>
+size_t simpleArray<T>::flatindex(size_t i, size_t j, size_t k)const{
+    size_t flat = 0;
+    if(isRagged()){
+        checkRaggedIndex(i,j);
+        flat = k + shape_.at(2)*(rowsplits_.at(i)+j);}
+    else{
+        flat = k + shape_.at(2)*(j + shape_.at(1)*i);}
+    return flat;
+}
+template<class T>
+size_t simpleArray<T>::flatindex(size_t i, size_t j, size_t k, size_t l)const{
+    size_t flat = 0;
+    if(isRagged()){
+        checkRaggedIndex(i,j);
+        flat = l + shape_.at(3)*(k + shape_.at(2)*(rowsplits_.at(i)+j));}
+    else{
+        flat = l + shape_.at(3)*(k + shape_.at(2)*(j + shape_.at(1)*i));}
+    return flat;
+}
+template<class T>
+size_t simpleArray<T>::flatindex(size_t i, size_t j, size_t k, size_t l, size_t m)const{
+    size_t flat = 0;
+        if(isRagged()){
+            checkRaggedIndex(i,j);
+            flat = m + shape_.at(4)*(l + shape_.at(3)*(k + shape_.at(2)*(rowsplits_.at(i)+j)));}
+        else{
+            flat = m + shape_.at(4)*(l + shape_.at(3)*(k + shape_.at(2)*(j + shape_.at(1)*i)));}
+    return flat;
+}
+template<class T>
+size_t simpleArray<T>::flatindex(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n)const{
+    size_t flat = 0;
+    if(isRagged()){
+        checkRaggedIndex(i,j);
+        flat = n + shape_.at(5)*(m + shape_.at(4)*(l + shape_.at(3)*(k + shape_.at(2)*(rowsplits_.at(i)+j))));}
+    else{
+        flat = n + shape_.at(5)*(m + shape_.at(4)*(l + shape_.at(3)*(k + shape_.at(2)*(j + shape_.at(1)*i))));}
+    return flat;
+}
+
+
+//no row split support here!! needs to be added!
+//relatively easy if dimension 1 is row split. other dimensions harder
+
+
+template<class T>
+size_t simpleArray<T>::sizeAt(size_t i)const{
+    checkShape(2);
+    if(!isRagged())
+        return shape_.at(1);
+    checkRaggedIndex(i,0);
+    return rowsplits_.at(i+1)-rowsplits_.at(i);
+}
+
+
+template<class T>
+T & simpleArray<T>::at(size_t i){
+    checkShape(1);
+    checkSize(i);
+    return data_[i];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i)const{
+    checkShape(1);
+    checkSize(i);
+    return data_[i];
+}
+
+template<class T>
+T & simpleArray<T>::at(size_t i, size_t j){
+    checkShape(2);
+    size_t flat = flatindex(i,j);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i, size_t j)const{
+    checkShape(2);
+    size_t flat = flatindex(i,j);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+T & simpleArray<T>::at(size_t i, size_t j, size_t k){
+    checkShape(3);
+    size_t flat = flatindex(i,j,k);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i, size_t j, size_t k)const{
+    checkShape(3);
+    size_t flat = flatindex(i,j,k);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l){
+    checkShape(4);
+    size_t flat = flatindex(i,j,k,l);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l)const{
+    checkShape(4);
+    size_t flat = flatindex(i,j,k,l);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l, size_t m){
+    checkShape(5);
+    size_t flat = flatindex(i,j,k,l,m);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l, size_t m)const{
+    checkShape(5);
+    size_t flat = flatindex(i,j,k,l,m);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n){
+    checkShape(6);
+    size_t flat = flatindex(i,j,k,l,m,n);
+    checkSize(flat);
+    return data_[flat];
+}
+
+template<class T>
+const T & simpleArray<T>::at(size_t i, size_t j, size_t k, size_t l, size_t m, size_t n)const{
+    checkShape(6);
+    size_t flat = flatindex(i,j,k,l,m,n);
+    checkSize(flat);
+    return data_[flat];
+}
+
+
+template<class T>
+void simpleArray<T>::fillZeros(){
+    for(size_t i=0;i<size();i++)
+        data_[i]=0;
+}
+
+
+/*
+ * PYTHON / NUMPY Interface below
+ *
+ */
+template<class T>
+std::vector<int> simpleArray<T>::makeNumpyShape()const{
+    if(!isRagged())
+        return shape_;
+    std::vector<int> out;
+    for(size_t i=1;i<shape_.size();i++)
+        out.push_back(std::abs(shape_.at(i)));
+    return out;
+}
+
+template<class T>
+void simpleArray<T>::checkArray(const boost::python::numpy::ndarray& ndarr,
+        boost::python::numpy::dtype dt)const{
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+
+    if(ndarr.get_dtype() != dt){
+        std::string dts = p::extract<std::string>(p::str(ndarr.get_dtype()));
+        std::string dtse = p::extract<std::string>(p::str(dt));
+        std::cout <<"input has dtype "<< dts <<  " expected " << dtse<< std::endl;
+        throw std::runtime_error("simpleArray<T>::checkArray: at least one array does not have right type. (e.g. row split must be int64)");
+    }
+    auto flags = ndarr.get_flags();
+    if(!(flags & np::ndarray::CARRAY) || !(flags & np::ndarray::C_CONTIGUOUS)){
+        throw std::runtime_error("simpleArray<T>::checkArray: at least one array is not C contiguous, please pass as numpy.ascontiguousarray(a, dtype='float32')");
+    }
+}
+
+template<class T>
+void simpleArray<T>::fromNumpy(const boost::python::numpy::ndarray& ndarr,
+        const boost::python::numpy::ndarray& rowsplits, bool copy){
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+
+    clear();
+    checkArray(ndarr, np::dtype::get_builtin<T>());
+
+    T * npdata = (T*)(void*) ndarr.get_data();
+    data_ = npdata;
+
+    int ndim = ndarr.get_nd();
+    std::vector<int> shape;
+    for(int s=0;s<ndim;s++)
+        shape.push_back(ndarr.shape(s));
+
+    //check row splits, anyway copied
+    if(len(rowsplits)>0){
+        checkArray(rowsplits, np::dtype::get_builtin<int64_t>());
+        rowsplits_.resize(len(rowsplits));
+        memcpy(&(rowsplits_.at(0)),(int64_t*)(void*) rowsplits.get_data(), rowsplits_.size() * sizeof(int64_t));
+        //check if row splits make sense
+        if(shape.at(0) != rowsplits_.at(rowsplits_.size()-1)){
+            throw std::out_of_range("simpleArray<T>::fromNumpy: row splits and input array incompatible. rowsplits[-1] != arr.shape[0].");
+        }
+        shape.insert(shape.begin(),len(rowsplits)-1);
+        shape_ = shape;
+        shape_ = shapeFromRowsplits();
+    }
+    else{
+        shape_ = shape;
+    }
+    size_ = sizeFromShape(shape_);
+
+    if(copy){
+        assigned_=false;
+        data_ = new T[size_];
+        memcpy(data_, npdata, size_* sizeof(T));
+    }
+    else{
+        assigned_=true;
+    }
+}
+
+template<class T>
+void simpleArray<T>::assignFromNumpy(const boost::python::numpy::ndarray& ndarr,
+        const boost::python::numpy::ndarray& rowsplits){
+    fromNumpy(ndarr,rowsplits, false);
+}
+template<class T>
+void simpleArray<T>::createFromNumpy(const boost::python::numpy::ndarray& ndarr,
+        const boost::python::numpy::ndarray& rowsplits){
+    fromNumpy(ndarr,rowsplits, true);
+}
+
+
+inline void destroyManagerCObject(PyObject* self) {
+    auto * b = reinterpret_cast<float*>( PyCapsule_GetPointer(self, NULL) );
+    delete [] b;
+}
+
+template<class T>
+std::vector<int64_t> simpleArray<T>::padRowsplits()const{ //rs 0, 1, 1 element
+    std::vector<int64_t>  out = rowsplits_;
+    if(out.size()){
+        size_t presize = rowsplits_.size();
+        size_t nelements = rowsplits_.at(rowsplits_.size()-1);
+        if((nelements<1 && !shape_.size()) || nelements!=-shape_.at(1)){
+            throw std::runtime_error("simpleArray<T>::padRowsplits: rowsplit format seems broken. Total number of entries at last entry: "+
+                    to_str(nelements)+" row splits: "+to_str(rowsplits_)+ " versus actual shape "+to_str(shape_));
+        }
+        if(nelements<3)//keep format of [rs ], nelements
+            nelements=3;
+        out.resize(nelements,0);
+        out.at(out.size()-1) = presize;
+    }
+    return out;
+}
+
+//transfers data ownership and cleans simpleArray instance
+template<class T>
+boost::python::tuple simpleArray<T>::transferToNumpy(bool pad_rowsplits){
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+
+    auto shape = makeNumpyShape();
+    T * data_ptr = disownData();
+
+    np::ndarray dataarr = STLToNumpy<T>(data_ptr, shape, size(), false);
+    if(pad_rowsplits){
+        auto rsp = padRowsplits();
+        np::ndarray rowsplits = STLToNumpy<int64_t>(&(rsp[0]), {(int)rsp.size()}, rsp.size(), true);
+        clear();
+        return p::make_tuple(dataarr,rowsplits);
+    }
+    //don't check. if rowsplits_.size()==0 function will return empty array and igonre invalid pointer
+    np::ndarray rowsplits = STLToNumpy<int64_t>(&(rowsplits_[0]), {(int)rowsplits_.size()}, rowsplits_.size(), true);
+    clear();//reset all
+    return p::make_tuple(dataarr,rowsplits);
+}
+
+//cpoies data
+template<class T>
+boost::python::tuple simpleArray<T>::copyToNumpy(bool pad_rowsplits)const{
+
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+
+    auto shape = makeNumpyShape();
+    T * data_ptr = data();
+
+    np::ndarray dataarr = STLToNumpy<T>(data_ptr, shape, size(), true);
+    if(pad_rowsplits){
+        auto rsp = padRowsplits();
+        np::ndarray rowsplits = STLToNumpy<int64_t>(&(rsp[0]), {(int)rsp.size()}, rsp.size(), true);
+        return p::make_tuple(dataarr,rowsplits);
+    }
+    np::ndarray rowsplits = STLToNumpy<int64_t>(&(rowsplits_[0]), {(int)rowsplits_.size()}, rowsplits_.size(), true);
+    return p::make_tuple(dataarr,rowsplits);
+
+}
+
+template<class T>
+void simpleArray<T>::setFeatureNamesPy(boost::python::list l){
+    std::vector<std::string> names = toSTLVector<std::string>(l);
+    setFeatureNames(names);
+}
+template<class T>
+boost::python::list simpleArray<T>::featureNamesPy(){
+    boost::python::list l;
+    for(const auto& v:featureNames())
+        l.append(v);
+    return l;
+}
+
+
+
+typedef simpleArray<float> simpleArray_float32;
+typedef simpleArray<int32_t> simpleArray_int32;
+
+
+
+}//namespace
+
+#endif /* DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAY_H_ */
diff --git a/compiled/interface/simpleArrayFiller.h b/compiled/interface/simpleArrayFiller.h
new file mode 100644
index 0000000..ce232ff
--- /dev/null
+++ b/compiled/interface/simpleArrayFiller.h
@@ -0,0 +1,105 @@
+/*
+ * simpleArrayFiller.h
+ *
+ *  Created on: 16 Mar 2021
+ *      Author: jkiesele
+ */
+
+#ifndef DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAYFILLER_H_
+#define DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAYFILLER_H_
+
+#include "simpleArray.h"
+#include <string>
+
+namespace djc{
+
+class trainDataFileStreamer;
+class simpleArrayFiller{
+    friend class trainDataFileStreamer;
+public:
+
+    enum dataUsage {feature_data, truth_data, weight_data};
+
+    ~simpleArrayFiller(){
+        clear();
+    }
+
+    /**
+     * the shape does not include the 'event' dimension
+     */
+    simpleArrayFiller(
+            const std::string name,
+            const std::vector<int>& shape,
+            simpleArrayBase::dtypes dtype,
+            dataUsage dusage,
+            bool isragged,
+            const std::vector<std::string>& featurenames=std::vector<std::string>());
+
+    //maybe replace that with direct 'set' access. TBI
+    inline simpleArrayBase & arr(){if(current_) return *current_; else throw std::logic_error("simpleArrayStreamer::arr: no array initialized.");}
+    inline const simpleArrayBase & arr()const{if(current_) return *current_; else throw std::logic_error("simpleArrayStreamer::arr: no array initialized.");}
+
+    void fill(){
+        arrays_.push_back(current_);
+        newCurrentArray();
+    }
+
+
+    simpleArrayBase * copyToFullArray()const;
+
+    //TBI
+    // tensor moveToTFTensor();
+
+private:
+
+    void fillEvent();
+    void clear();
+    void clearData();
+
+    simpleArrayFiller(){}
+
+    template<class T>
+    simpleArrayBase * priv_copyToFullArray()const{
+        std::vector<int> newshape;
+        if(isragged_)
+            newshape = {(int)rowsplits_.size()-1,-1}; //second dimension is the variable one
+        else
+            newshape = {(int)arrays_.size()};
+        //add the actual 'per event' shape
+        newshape.insert(newshape.end(), prototype_->shape().begin(),prototype_->shape().end());
+        T * outp = 0;
+        if(isragged_)
+            outp = new T(newshape,rowsplits_);
+        else
+            outp = new T(newshape);
+        outp->setName(prototype_->name());
+        outp->setFeatureNames(prototype_->featureNames());
+        size_t counter=0;
+        for(const auto& a:arrays_){
+            for(size_t i=0;i<a->size();i++){
+                outp->data()[counter] = dynamic_cast<T*>(a)->data()[i];
+                counter++;
+            }
+        }
+        return outp;
+    }
+
+    //this is not exact but good enough for approx buffering
+    size_t memSizeKB()const;
+
+    void newCurrentArray();
+
+    //needs to be pointers because of types
+    std::vector<simpleArrayBase* > arrays_;
+    std::vector<int64_t> rowsplits_;
+    simpleArrayBase* current_;
+    simpleArrayBase* prototype_;
+    dataUsage dusage_;
+    bool isragged_;
+};
+
+}//djc
+
+
+
+#endif /* DEEPJETCORE_COMPILED_INTERFACE_SIMPLEARRAYFILLER_H_ */
diff --git a/compiled/interface/trainData.h b/compiled/interface/trainData.h
new file mode 100644
index 0000000..a7d3404
--- /dev/null
+++ b/compiled/interface/trainData.h
@@ -0,0 +1,440 @@
+/*
+ * trainDataInterface.h
+ *
+ *  Created on: 5 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAINTERFACE_H_
+#define DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAINTERFACE_H_
+
+//#define DJC_DATASTRUCTURE_PYTHON_BINDINGS//DEBUG
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include <boost/python/exception_translator.hpp>
+#include "helper.h"
+
+#include "simpleArray.h"
+#include <stdio.h>
+#include "IO.h"
+
+#include <iostream>
+
+namespace djc{
+
+/*
+ * use small helper class to store simpleArrayBase pointers
+ * and manage ownership where needed.
+ * just wrap around std::vector
+ */
+class typeContainer{
+public:
+
+    void push_back(simpleArrayBase& a);
+    void move_back(simpleArrayBase& a);
+
+    bool operator==(const typeContainer& rhs)const;
+    bool operator!=(const typeContainer& rhs)const{
+        return !(*this==rhs);
+    }
+
+
+    simpleArrayBase& at(size_t idx);
+    const simpleArrayBase& at(size_t idx)const;
+
+    simpleArrayBase::dtypes dtype(size_t idx)const{return at(idx).dtype();}
+
+    simpleArray_float32& at_asfloat32(size_t idx);
+    const simpleArray_float32& at_asfloat32(size_t idx)const;
+    simpleArray_int32& at_asint32(size_t idx);
+    const simpleArray_int32& at_asint32(size_t idx)const;
+
+    void clear();
+
+    size_t size()const{return sorting_.size();}
+
+
+    void writeToFile(FILE *&) const;
+    inline void readFromFile(FILE *&f){
+        readFromFile_priv(f,false);
+    }
+
+    inline void readMetaDataFromFile(FILE *&f){//produces size 0 arrays with correct dtypes and shapes otherwise
+        readFromFile_priv(f,true);
+    }
+
+private:
+    void readFromFile_priv(FILE *& f, bool justmetadata);
+
+    std::vector<simpleArray_float32> farrs_;
+    std::vector<simpleArray_int32> iarrs_;
+
+    enum typesorting{isfloat,isint};
+    std::vector<std::pair<typesorting,size_t> > sorting_;
+
+};
+
+
+/*
+ * The idea is to make this a fixed size array class, that is filled with data and then written out once full.
+ * a truncate function will allow to  truncate arrays at a given position.
+ * This is memory intense, but can be written out in small pieces and then merged
+ *
+ * No checks on the first dimension because of possibly ragged arrays
+ */
+
+class trainData{
+public:
+
+
+
+    bool operator==(const trainData& rhs)const;
+    bool operator!=(const trainData& rhs)const{
+        return !(*this==rhs);
+    }
+    //takes ownership
+    //these need to be separated by input type because python does not allow for overload
+    //but then the py interface can be made generic  to accept differnt types
+
+    //make this a base reference and then check for dtype and cast
+    //
+    int storeFeatureArray( simpleArrayBase&);
+    int storeTruthArray( simpleArrayBase&);
+    int storeWeightArray( simpleArrayBase&);
+
+    //for python, no implicit cast
+    inline int storeFeatureArray( simpleArray_float32& a){
+        return storeFeatureArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+    inline int storeTruthArray( simpleArray_float32& a){
+        return storeTruthArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+    inline int storeWeightArray( simpleArray_float32& a){
+        return storeWeightArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+
+    inline int storeFeatureArray( simpleArray_int32&a){
+        return storeFeatureArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+    inline int storeTruthArray( simpleArray_int32& a){
+        return storeTruthArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+    inline int storeWeightArray( simpleArray_int32& a){
+        return storeWeightArray(dynamic_cast<simpleArrayBase&> (a));
+    }
+
+    //these are not really used so much -->
+    /*
+     * This class actually doesn't really need data operations. so it can implement
+     * only simpleArrayBase calls
+     *
+     *
+     */
+
+    const simpleArrayBase & featureArray(size_t idx) const {
+        return feature_arrays_.at(idx);
+    }
+
+    const simpleArrayBase & truthArray(size_t idx) const {
+        return truth_arrays_.at(idx);
+    }
+
+    const simpleArrayBase & weightArray(size_t idx) const {
+        return weight_arrays_.at(idx);
+    }
+
+    simpleArrayBase & featureArray(size_t idx)  {
+        return feature_arrays_.at(idx);
+    }
+
+    simpleArrayBase & truthArray(size_t idx)  {
+        return truth_arrays_.at(idx);
+    }
+
+    simpleArrayBase & weightArray(size_t idx)  {
+        return weight_arrays_.at(idx);
+    }
+
+    //<---
+
+    int nFeatureArrays()const{return feature_arrays_.size();}
+    int nTruthArrays()const{return truth_arrays_.size();}
+    int nWeightArrays()const{return weight_arrays_.size();}
+
+    /*
+     * truncate all along first axis
+     */
+    void truncate(size_t position);
+
+    /*
+     * append along first axis
+     */
+    void append(const trainData& );
+
+    /*
+     * split along first axis
+     * Returns the second part, leaves the first.
+     */
+    trainData split(size_t splitindex);
+    trainData getSlice(size_t splitindex_begin, size_t splitindex_end)const;
+
+    trainData shuffle(const std::vector<size_t>& shuffle_idxs)const;
+
+    bool validSlice(size_t splitindex_begin, size_t splitindex_end)const ;
+
+    /*
+     *
+     */
+    size_t nElements()const{
+        if(feature_shapes_.size() && feature_shapes_.at(0).size())
+            return feature_shapes_.at(0).at(0);
+        else
+            return 0;
+    }
+
+    int nTotalElements()const{
+        if(feature_shapes_.size() && feature_shapes_.at(0).size()){
+            int ntotalelems=0;
+            for(size_t i=0;i< feature_shapes_.at(0).size(); i++){
+                ntotalelems = feature_shapes_.at(0).at(i);
+                if(i>0 && ntotalelems<0)
+                    return std::abs(ntotalelems);
+                else if(i>0)
+                    return feature_shapes_.at(0).at(0);
+            }
+        }
+        else
+            return 0;
+        return 0;
+    }
+
+    const std::vector<std::vector<int> > & featureShapes()const{return  feature_shapes_;}
+    const std::vector<std::vector<int> > & truthShapes()const{return  truth_shapes_;}
+    const std::vector<std::vector<int> > & weightShapes()const{return  weight_shapes_;}
+
+    void writeToFile(std::string filename)const;
+    void addToFile(std::string filename)const;
+
+    void addToFileP(FILE *& f)const;
+
+    void readFromFile(std::string filename){
+        priv_readFromFile(filename,false);
+    }
+    void readFromFileBuffered(std::string filename){
+        priv_readFromFile(filename,true);
+    }
+
+    //could use a readshape or something!
+    void readMetaDataFromFile(const std::string& filename);
+
+    std::vector<int64_t> getFirstRowsplits()const;
+    std::vector<int64_t> readShapesAndRowSplitsFromFile(const std::string& filename, bool checkConsistency=true);
+
+    void clear();
+
+    trainData copy()const {return *this;}
+    //from python
+    void skim(size_t batchelement);
+
+
+
+    inline boost::python::list getNumpyFeatureShapes()const{
+        return transferShapesToPyList(feature_shapes_);
+    }
+    inline boost::python::list getNumpyTruthShapes()const{
+        return transferShapesToPyList(truth_shapes_);
+    }
+    inline boost::python::list getNumpyWeightShapes()const{
+        return transferShapesToPyList(weight_shapes_);
+    }
+
+    inline boost::python::list getNumpyFeatureDTypes()const{
+        return transferDTypesToPyList(feature_arrays_);
+    }
+    inline boost::python::list getNumpyTruthDTypes()const{
+        return transferDTypesToPyList(truth_arrays_);
+    }
+    inline boost::python::list getNumpyWeightDTypes()const{
+        return transferDTypesToPyList(weight_arrays_);
+    }
+
+    inline boost::python::list getNumpyFeatureArrayNames()const{
+        return transferNamesToPyList(feature_arrays_);
+    }
+    inline boost::python::list getNumpyTruthArrayNames()const{
+        return transferNamesToPyList(truth_arrays_);
+    }
+    inline boost::python::list getNumpyWeightArrayNames()const{
+        return transferNamesToPyList(weight_arrays_);
+    }
+
+    //has ragged support
+    boost::python::list transferFeatureListToNumpy(bool padrowsplits=false);
+
+    //has ragged support
+    boost::python::list transferTruthListToNumpy(bool padrowsplits=false);
+
+    //no ragged support
+    boost::python::list transferWeightListToNumpy(bool padrowsplits=false);
+
+
+    boost::python::list getTruthRaggedFlags()const;
+
+    /*
+     * the following ones can be improved w.r.t. performance
+     */
+
+
+    //has ragged support
+    boost::python::list copyFeatureListToNumpy(bool padrowsplits=false){
+        auto td = *this;
+        return td.transferFeatureListToNumpy(padrowsplits); //fast hack
+    }
+
+    //has ragged support
+    boost::python::list copyTruthListToNumpy(bool padrowsplits=false){
+        auto td = *this;
+        return td.transferTruthListToNumpy(padrowsplits); //fast hack
+    }
+
+    //no ragged support
+    boost::python::list copyWeightListToNumpy(bool padrowsplits=false){
+        auto td = *this;
+        return td.transferWeightListToNumpy(padrowsplits); //fast hack
+    }
+
+
+private:
+
+    void priv_readFromFile(std::string filename, bool memcp);
+
+    trainData priv_readFromFileP(FILE *& f, const std::string& filename)const;
+    void priv_readSelfFromFileP(FILE *& f, const std::string& filename);
+
+    void checkFile(FILE *& f, const std::string& filename="")const;
+
+
+    void readRowSplitArray(FILE *&, std::vector<int64_t> &rs, bool check)const;
+
+    std::vector<std::vector<int> > getShapes(const typeContainer& a)const;
+
+    template <class U>
+    void writeNested(const std::vector<std::vector<U> >& v, FILE *&)const;
+    template <class U>
+    void readNested( std::vector<std::vector<U> >& v, FILE *&)const;
+
+    void updateShapes();
+
+    boost::python::list transferNamesToPyList(const typeContainer&)const;
+    boost::python::list transferShapesToPyList(const std::vector<std::vector<int> >&)const;
+    boost::python::list transferDTypesToPyList(const typeContainer&)const;
+
+
+    typeContainer feature_arrays_;
+    typeContainer truth_arrays_;
+    typeContainer weight_arrays_;
+
+    std::vector<std::vector<int> > feature_shapes_;
+    std::vector<std::vector<int> > truth_shapes_;
+    std::vector<std::vector<int> > weight_shapes_;
+
+
+    boost::python::list transferToNumpyList(typeContainer& , bool pad_rowsplits);
+
+
+};
+
+
+
+
+/*
+ * append along first axis
+ */
+
+/*
+ * split along first axis
+ * Returns the first part, leaves the second.
+ *
+ * Can use some performance improvements
+ */
+
+
+
+
+
+
+
+
+
+
+
+template <class U>
+void trainData::writeNested(const std::vector<std::vector<U> >& v, FILE *& ofile)const{
+
+    size_t size = v.size();
+    io::writeToFile(&size, ofile);
+    for(size_t i=0;i<size;i++){
+        size_t nsize = v.at(i).size();
+        io::writeToFile(&nsize, ofile);
+        if(nsize==0)
+            continue;
+        io::writeToFile(&(v.at(i).at(0)),ofile,nsize);
+    }
+
+}
+
+template <class U>
+void trainData::readNested(std::vector<std::vector<U> >& v, FILE *& ifile)const{
+
+    size_t size = 0;
+    io::readFromFile(&size, ifile);
+    v.resize(size,std::vector<U>(0));
+    for(size_t i=0;i<size;i++){
+        size_t nsize = 0;
+        io::readFromFile(&nsize, ifile);
+        v.at(i).resize(nsize);
+        if(nsize==0)
+            continue;
+        io::readFromFile(&(v.at(i).at(0)),ifile,nsize);
+    }
+
+}
+
+
+/*
+ * Array storage:
+ * length, shape, length row splits, [row splits] ? numpy doesn't like ragged... maybe just return row splits?
+ * (shape is int. negative entries provoke row splits, only splits in one dimension supported)
+ *
+ * all data is float32. only row splits and shapes should be int (not size_t) for simple python conversion
+ *
+ * make it a traindata object
+ *
+ * interface:
+ *
+ * writeToFile(vector< float * > c_arrays (also pointers to first vec element), vector< vector<int> > shapes, (opt)  vector< vector<int> > row_splits, filename)
+ *
+ * readFromFile
+ *
+ */
+
+/*
+ *
+ * Make a write CPP interface that does not need boost whatsoever!
+ * Then wrap it for python-numpy bindings externally
+ *
+ *
+ */
+
+/*
+ * uncompressed header with all shape infos
+ * compressed x,y,w lists or arrays?
+ *
+ *
+ */
+
+}//namespace
+
+#endif /* DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAINTERFACE_H_ */
diff --git a/compiled/interface/trainDataFileStreamer.h b/compiled/interface/trainDataFileStreamer.h
new file mode 100644
index 0000000..f538e6e
--- /dev/null
+++ b/compiled/interface/trainDataFileStreamer.h
@@ -0,0 +1,173 @@
+/*
+ * trainDataFileStreamer.h
+ *
+ *  Created on: 15 Mar 2021
+ *      Author: jkiesele
+ */
+
+#ifndef DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAFILESTREAMER_H_
+#define DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAFILESTREAMER_H_
+
+#include "simpleArray.h"
+#include "simpleArrayFiller.h"
+#include <string>
+#include <initializer_list>
+/*
+ * general idea: just manage access to simpleArrays.
+ * Once buffer full, create trainData and write out
+ * (make sure this can be threaded)
+ *
+ *
+ */
+
+//helper, might be moved
+/*
+ * vector of simplearray
+ * vector of row splits created when calling next
+ *
+ * return simpleArrayBase (typeless)
+ */
+namespace djc{
+
+
+
+/**
+ *
+ * Usage:
+ * - create the trainDataFileStreamer object (sets output file name and opt. buffer size).
+ * - add arrays to it
+ * - fill the arrays in the event loop (using arr()->set(i,j,k,l,value) . can have one ragged dimension
+ * - finish the event synchronously for all arrays calling fillEvent
+ *
+ *
+ * Example pseudo code:
+ *
+ *
+ *
+ *    trainDataFileStreamer fs("outfile.djctd");
+ *    auto features = fs.add("myfeatures",                      // just a name, can also be left blank
+ *                           {3},                               // the shape, here just 3 features
+ *                           simpleArrayBase::float32,          // the data type
+ *                           simpleArrayFiller::feature_data, // what it's used for
+ *                           true,                              // data is ragged (variable 1st dimension)
+ *                           {"jetpt","jeteta","jetphi"});      // optional feature names
+ *
+ *
+ *    auto zeropadded = fs.add("myzeropadded_lepton_features",// just a name, can also be left blank
+ *                           {5,3},                             // 3 features each for the first 5 leptons
+ *                           simpleArrayBase::float32,          // the data type
+ *                           simpleArrayFiller::feature_data, // what it's used for
+ *                           false,                             // data is not ragged
+ *                           {"pt","eta","phi"});               // optional feature names
+ *
+ *    //add a non ragged per-event variable
+ *    auto truth = fs.add("isSignal",{1},simpleArrayBase::int32,simpleArrayFiller::truth_data, false);
+ *
+ *    for(event: events){
+ *
+ *        for(jet: jets){
+ *            features->arr().set(0, jet->pt());
+ *            features->arr().set(1, jet->eta());
+ *            features->arr().set(2, jet->phi());
+ *            features->fill()
+ *        }
+ *
+ *
+ *        zeropadded->arr().fillZero(); //make sure everything is initialized with zeros
+ *        for(size_t i=0;i<leptons.size();i++){
+ *            zeropadded->arr().set(i,0,leptons.at(i).pt());
+ *            zeropadded->arr().set(i,1,leptons.at(i).eta());
+ *            zeropadded->arr().set(i,2,leptons.at(i).phi());
+ *            if(i>3)
+ *               break;
+ *        }
+ *        zeropadded->fill();
+ *
+ *        truth->arr().set(0, isSUSYevent);
+ *        truth->fill()
+ *
+ *        fs.fillEvent();
+ *    }
+ *
+ *    //no need to explicitly write out/close etc. action is implemented in the destructor
+ *
+ */
+class trainDataFileStreamer {
+public:
+
+    trainDataFileStreamer(
+            const std::string & filename,
+            float bufferInMB=20);
+
+    ~trainDataFileStreamer(){
+        writeBuffer(true);//write remaining items
+        for(auto& a:arraystreamers_a_)
+            delete a;
+        for(auto& a:arraystreamers_b_)
+            delete a;
+    }
+
+    simpleArrayFiller* add(const std::string& name,
+            const std::vector<int>& shape,
+            simpleArrayBase::dtypes type,
+            simpleArrayFiller::dataUsage dusage,
+            bool isragged,
+            const std::vector<std::string>& featurenames=std::vector<std::string>()){
+        simpleArrayFiller* as = new simpleArrayFiller(name,shape,type,dusage,isragged,featurenames);
+        activestreamers_->push_back(as);
+        return as;
+    }
+
+
+
+    inline void fillEvent(){
+        //makes sure it's in sync
+        for(auto& a:*activestreamers_)
+            a->fillEvent();
+        if(bufferFull())
+            writeBuffer();
+    }
+
+
+private:
+
+    void writeBuffer(bool sync=false);
+    bool bufferFull();
+
+
+    std::vector<simpleArrayFiller*> arraystreamers_a_;
+    std::vector<simpleArrayFiller*> arraystreamers_b_;
+    std::vector<simpleArrayFiller*> * activestreamers_;
+    std::vector<simpleArrayFiller*> * writingstreamers_;
+    std::string filename_;
+    float buffermb_;
+
+};
+
+
+namespace test{
+
+void testTrainDataFileStreamer();
+
+}
+
+
+/*
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ */
+
+
+}//djc
+
+#endif /* DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAFILESTREAMER_H_ */
diff --git a/compiled/interface/trainDataGenerator.h b/compiled/interface/trainDataGenerator.h
new file mode 100644
index 0000000..6bbb03c
--- /dev/null
+++ b/compiled/interface/trainDataGenerator.h
@@ -0,0 +1,156 @@
+/*
+ * trainDataGenerator.h
+ *
+ *  Created on: 7 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAGENERATOR_H_
+#define DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAGENERATOR_H_
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include <boost/python/exception_translator.hpp>
+#include "helper.h"
+#include "pythonToSTL.h"
+
+#include <string>
+#include <vector>
+#include "trainData.h"
+#include <algorithm>
+#include <random>
+#include <iterator>
+#include <thread>
+#include <iostream>
+
+namespace djc{
+
+/*
+ * Base class, no numpy interface or anything yet.
+ * Inherit from/use this class and define the actual batch feed function.
+ * This could as well be filling a (ragged) tensorflow tensor
+ *
+ *
+ * Notes for future improvements:
+ *
+ *  - pre-split trainData in buffer (just make it a vector/fifo-like queue)
+ *    propagates to trainData, simpleArray, then make multiple memcpy (even threaded?)
+ *    (but not read - it is still a split!)
+ *    This makes the second thread obsolete, and still everything way faster!
+ *
+ *  - for ragged: instead of batch size, set upper limit on data size (number of floats)
+ *    can be used to pre-split in a similar way
+ *
+ *
+ *
+ *
+ */
+
+class trainDataGenerator{
+public:
+    trainDataGenerator();
+    ~trainDataGenerator();
+
+    /**
+     * Also opens all files (verify) and gets the total sample size
+     */
+    void setFileList(const std::vector<std::string>& files){
+        clear();
+        orig_infiles_=files;
+        readInfo();
+    }
+
+
+    void setFileListPy(boost::python::list files);
+
+    void setBuffer(const trainData&);
+
+    void setBatchSize(size_t nelements){
+        batchsize_= nelements;
+        if(orig_rowsplits_.size())
+            prepareSplitting();
+    }
+    void setSquaredElementsLimit(bool use_sq_limit){
+        sqelementslimit_=use_sq_limit;
+        if(orig_rowsplits_.size())
+            prepareSplitting();
+    }
+    void setSkipTooLargeBatches(bool skipthem){
+        skiplargebatches_=skipthem;
+        if(orig_rowsplits_.size())
+            prepareSplitting();
+    }
+
+    int getNTotal()const{return ntotal_;}
+
+    void setFileTimeout(size_t seconds){
+        filetimeout_=seconds;
+    }
+
+    int getNBatches()const{return nbatches_;}
+
+    bool lastBatch()const;
+
+    bool isEmpty()const;
+
+    void prepareNextEpoch();
+
+    void shuffleFileList();
+
+    void end();
+    /**
+     * clears all dataset related info but keeps batch size, file timout etc
+     */
+    void clear();
+
+    /**
+     * gets Batch. If batchsize is specified, it is up to the user
+     * to make sure that the sum of all batches is smaller or equal the
+     * total sample size.
+     * The batch size is always the size of the NEXT batch!
+     *
+     */
+    trainData getBatch(); //if no threading batch index can be given? just for future?
+
+    int debuglevel;
+
+
+
+private:
+    void readBuffer();
+    void readInfo();
+    std::vector<int64_t> subShuffleRowSplits(const std::vector<int64_t>& thisrs,
+            const std::vector<size_t>& s_idx)const;
+    void prepareSplitting();
+    bool tdHasRaggedDimension(const trainData& )const;
+
+    trainData  prepareBatch();
+    std::vector<std::string> orig_infiles_;
+    std::vector<size_t> shuffle_indices_;
+    std::vector<std::vector<size_t> > sub_shuffle_indices_;
+    std::vector<std::vector<int64_t> > orig_rowsplits_;
+    std::vector<size_t> splits_;
+    std::vector<bool> usebatch_;
+    int randomcount_;
+    size_t batchsize_;
+    bool sqelementslimit_,skiplargebatches_;
+
+    trainData buffer_store, buffer_read;
+    std::thread * readthread_;
+    std::string nextread_;
+    size_t nextreadIdx_;
+    size_t filecount_;
+    size_t nbatches_;
+    size_t npossiblebatches_;
+    size_t ntotal_;
+    size_t nsamplesprocessed_;
+    size_t lastbatchsize_;
+    size_t filetimeout_;
+    size_t batchcount_;
+    size_t lastbuffersplit_;
+};
+
+
+}//namespace
+#endif /* DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_TRAINDATAGENERATOR_H_ */
diff --git a/compiled/interface/version.h b/compiled/interface/version.h
new file mode 100644
index 0000000..fff3d05
--- /dev/null
+++ b/compiled/interface/version.h
@@ -0,0 +1,21 @@
+/*
+ * version.h
+ *
+ *  Created on: 6 Nov 2019
+ *      Author: jkiesele
+ */
+
+#ifndef DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_VERSION_H_
+#define DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_VERSION_H_
+
+#define DJCDATAVERSION (2.1f)
+#define DJCDATAVERSION_COMPAT (2.0f)
+
+bool checkVersionCompatible(const float& version);
+
+inline bool checkVersionStrict(float version){
+    return version == DJCDATAVERSION;
+}
+
+
+#endif /* DJCDEV_DEEPJETCORE_COMPILED_INTERFACE_VERSION_H_ */
diff --git a/compiled/src/IO.cpp b/compiled/src/IO.cpp
new file mode 100644
index 0000000..962acb4
--- /dev/null
+++ b/compiled/src/IO.cpp
@@ -0,0 +1,46 @@
+#include "../interface/IO.h"
+
+
+namespace djc{
+namespace io{
+
+template <>
+void writeToFile<std::string>(const std::string * p, FILE * ofile, size_t N, size_t Nbytes){
+    N=p->length();
+    Nbytes = N*sizeof(char);
+    writeToFile<size_t>(&N,ofile);
+    if(!N)
+        return;
+    size_t ret = fwrite(p->data(), 1, Nbytes, ofile);
+    if(ret != Nbytes){
+        std::string fname = followFileName(ofile);
+        fclose(ofile);
+        throw std::runtime_error("djc::io::writeToFile: writing to file "+fname+" not successful");
+    }
+}
+
+
+template <>
+void readFromFile<std::string>(std::string * p, FILE* ifile, size_t N, size_t Nbytes){
+
+    readFromFile<size_t>(&N,ifile);
+    if(!N){
+        *p="";
+        return;
+    }
+    char * c = new char[N];
+
+    Nbytes = N* sizeof(char);
+    size_t ret = fread(c, 1, Nbytes, ifile);
+    *p = std::string(c,N);
+    delete[] c;
+
+    if(ret != Nbytes){
+        std::string fname = followFileName(ifile);
+        fclose(ifile);
+        throw std::runtime_error("djc::io::readFromFile:reading from file "+fname+" not successful");
+    }
+}
+
+}//ns
+}//ns
diff --git a/compiled/src/LinkDef.h b/compiled/src/LinkDef.h
new file mode 100644
index 0000000..db472e4
--- /dev/null
+++ b/compiled/src/LinkDef.h
@@ -0,0 +1,21 @@
+/*
+ * classes.h
+ *
+ *  Created on: 30 Apr 2019
+ *      Author: jkiesele
+ */
+
+#ifndef BIN_LINKDEF_H_
+#define BIN_LINKDEF_H_
+
+#ifdef __ROOTCLING__
+#include <vector>
+
+#pragma link C++ class std::vector<float> +;
+#pragma link C++ class std::vector<std::vector<float> > +;
+#pragma link C++ class std::vector<std::vector<std::vector<float> > > +;
+#pragma link C++ class std::vector<std::vector<std::vector<std::vector<float> > > > +;
+
+#endif
+
+#endif /* BIN_LINKDEF_H_ */
diff --git a/compiled/src/c_arrayReads.C b/compiled/src/c_arrayReads.C
new file mode 100644
index 0000000..6c17689
--- /dev/null
+++ b/compiled/src/c_arrayReads.C
@@ -0,0 +1,237 @@
+
+#define BOOST_PYTHON_MAX_ARITY 20
+#include <boost/python.hpp>
+#include "boost/python/extract.hpp"
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include "boost/python/str.hpp"
+//#include "boost/filesystem.hpp"
+#include <iostream>
+#include <stdint.h>
+#include "TString.h"
+#include <string>
+#include <vector>
+#include "TFile.h"
+#include "TTree.h"
+#include <boost/python/exception_translator.hpp>
+#include <exception>
+#include "TStopwatch.h"
+#include "../interface/indata.h"
+#include "../interface/pythonToSTL.h"
+#include "../interface/helper.h"
+#include <cmath>
+
+
+using namespace boost::python; //for some reason....
+
+
+
+void read2DArray(boost::python::numpy::ndarray numpyarray,
+        std::string filename_std,
+        std::string treename_std,
+        std::string branchname_std,
+        int rebinx,
+        int rebiny,
+        bool zeropad,
+        bool x_cutoff,
+        boost::python::numpy::ndarray x_ncut
+        ) {
+
+
+    TFile * tfile = new TFile(filename_std.data(),"READ");
+    checkTObject(tfile,"read2DArray: input file problem");
+
+    TTree* tree=(TTree*)tfile->Get(treename_std.data());
+    checkTObject(tree,"read2DArray: input tree problem");
+
+
+    int nentries = (int) boost::python::len(numpyarray);
+    int nx=0;
+    if(nentries)
+        nx = (int) boost::python::len(numpyarray[0]);
+    int ny=0;
+    if(nx)
+        ny = (int) boost::python::len(numpyarray[0][0]);
+
+    if(!nentries || nentries != tree->GetEntries()){
+        std::cerr << "read2DArray: tree/array entries don't match" << std::endl;
+        throw std::runtime_error("read2DArray: tree/array entries don't match");
+    }
+
+
+    std::vector<std::vector<float> > *inarr = 0;
+    tree->SetBranchAddress(branchname_std.data(),&inarr);
+
+    tree->GetEntry(0);
+
+    if(!zeropad && (nx*rebinx!=(int)inarr->size() || ny*rebiny!=(int)inarr->at(0).size())){
+        std::cerr << "read2DArray: tree/array dimensions don't match" << std::endl;
+        throw std::runtime_error("read2DArray: tree/array dimensions don't match");
+    }
+
+    int npe=0;
+    for(int e=0;e<nentries;e++){
+        tree->GetEntry(e);
+        if(inarr->size() > nx){
+            if(x_cutoff){
+                x_ncut[0]+=1;
+                continue;}
+            else throw std::out_of_range("read2DArray: x ([:,x,...]) out of range");
+        }
+        for(size_t x=0;x<inarr->size();x++){
+            int npx = (int)x/rebinx;
+            for(size_t y=0;y<inarr->at(x).size();y++){
+                int npy = (int)y/rebiny;
+                numpyarray[npe][npx][npy][0] += inarr->at(x)[y];
+            }
+        }
+        npe++;
+    }
+    tfile->Close();
+    delete tfile;
+}
+
+
+void read3DArray(boost::python::numpy::ndarray numpyarray,
+        std::string filename_std,
+        std::string treename_std,
+        std::string branchname_std,
+        int rebinx=1,
+        int rebiny=1,
+        int rebinz=1,
+        bool zeropad=false) {
+
+
+    TFile * tfile = new TFile(filename_std.data(),"READ");
+    checkTObject(tfile,"read2DArray: input file problem");
+
+    TTree* tree=(TTree*)tfile->Get(treename_std.data());
+    checkTObject(tree,"read2DArray: input tree problem");
+
+
+    int nentries = (int) boost::python::len(numpyarray);
+    int nx=0;
+    if(nentries)
+        nx = (int) boost::python::len(numpyarray[0]);
+    int ny=0;
+    if(nx)
+        ny = (int) boost::python::len(numpyarray[0][0]);
+    int nz=0;
+    if(ny)
+        nz = (int) boost::python::len(numpyarray[0][0][0]);
+
+    if(!nentries || nentries != tree->GetEntries()){
+        std::cerr << "read3DArray: tree/array entries don't match" << std::endl;
+        throw std::runtime_error("read3DArray: tree/array entries don't match");
+    }
+
+
+    std::vector<std::vector<std::vector<float> > > * inarr = 0;
+    tree->SetBranchAddress(branchname_std.data(),&inarr);
+
+    tree->GetEntry(0);
+
+    if(!zeropad && (nx*rebinx!=(int)inarr->size() || ny*rebiny!=(int)inarr->at(0).size() || nz*rebinz!=(int)inarr->at(0).at(0).size())){
+        std::cerr << "read3DArray: tree/array dimensions don't match" << std::endl;
+        throw std::runtime_error("read3DArray: tree/array dimensions don't match");
+    }
+
+    for(int e=0;e<nentries;e++){
+        tree->GetEntry(e);
+        for(size_t x=0;x<inarr->size();x++){
+            int npx = (int)x/rebinx;
+            for(size_t y=0;y<inarr->at(x).size();y++){
+                int npy = (int)y/rebiny;
+                for(size_t z=0;z<inarr->at(x)[y].size();z++){
+                    int npz = (int)z/rebinz;
+                    numpyarray[e][npx][npy][npz][0] += inarr->at(x)[y][z];
+                }
+            }
+        }
+    }
+    tfile->Close();
+    delete tfile;
+}
+
+
+void read4DArray(boost::python::numpy::ndarray numpyarray,
+        std::string filename_std,
+        std::string treename_std,
+        std::string branchname_std,
+        int rebinx=1,
+        int rebiny=1,
+        int rebinz=1,
+        int rebinf=1,
+        bool zeropad=false) {
+
+
+    TFile * tfile = new TFile(filename_std.data(),"READ");
+    checkTObject(tfile,"read2DArray: input file problem");
+
+    TTree* tree=(TTree*)tfile->Get(treename_std.data());
+    checkTObject(tree,"read2DArray: input tree problem");
+
+
+    int nentries = (int) boost::python::len(numpyarray);
+    int nx=0;
+    if(nentries)
+        nx = (int) boost::python::len(numpyarray[0]);
+    int ny=0;
+    if(nx)
+        ny = (int) boost::python::len(numpyarray[0][0]);
+    int nz=0;
+    if(ny)
+        nz = (int) boost::python::len(numpyarray[0][0][0]);
+    int nf=0;
+    if(nz)
+        nf = (int) boost::python::len(numpyarray[0][0][0][0]);
+
+    if(!nentries || nentries != tree->GetEntries()){
+        std::cerr << "read4DArray: tree/array entries don't match" << std::endl;
+        throw std::runtime_error("read4DArray: tree/array entries don't match");
+    }
+
+    std::vector<std::vector<std::vector<std::vector<float> > > > * inarr = 0;
+    tree->SetBranchAddress(branchname_std.data(),&inarr);
+
+    tree->GetEntry(0);
+
+    if(!zeropad && (nx*rebinx!=(int)inarr->size() || ny*rebiny!=(int)inarr->at(0).size() || nz*rebinz!=(int)inarr->at(0).at(0).size()
+            || nf*rebinf!=(int)inarr->at(0).at(0).at(0).size())){
+        std::cout << "nx*rebinx "<<nx*rebinx<<", in "<< inarr->size()<<'\n';
+        std::cout << "ny*rebiny "<<ny*rebiny<<", in "<< inarr->at(0).size()<<'\n';
+        std::cout << "nz*rebinz "<<nz*rebinz<<", in "<< inarr->at(0).at(0).size()<<'\n';
+        std::cout << "nf*rebinf "<<nf*rebinf<<", in "<< inarr->at(0).at(0).at(0).size()<<'\n';
+        throw std::runtime_error("read4DArray: tree/array dimensions don't match");
+    }
+    for(int e=0;e<nentries;e++){
+        tree->GetEntry(e);
+        for(size_t x=0;x<inarr->size();x++){
+            int npx = (int)x/rebinx;
+            for(size_t y=0;y<inarr->at(x).size();y++){
+                int npy = (int)y/rebiny;
+                for(size_t z=0;z<inarr->at(x)[y].size();z++){
+                    int npz = (int)z/rebinz;
+                    for(size_t f=0;f<inarr->at(x)[y][z].size();f++){
+                        int npf = (int)f/rebinf;
+                       // std::cout << e <<", "<< npx <<", "<< npy <<", "<< npz <<", "<< npf << ": "<< f<< std::endl;
+                        numpyarray[e][npx][npy][npz][npf][0] += inarr->at(x)[y][z][f];
+                    }
+                }
+            }
+        }
+    }
+    tfile->Close();
+    delete tfile;
+}
+
+
+// Expose classes and methods to Python
+BOOST_PYTHON_MODULE(c_arrayReads) {
+
+    boost::python::numpy::initialize();
+    def("read2DArray", &read2DArray);
+    def("read3DArray", &read3DArray);
+    def("read4DArray", &read4DArray);
+}
+
diff --git a/compiled/src/c_makePlots.C b/compiled/src/c_makePlots.C
index 7c942fe..b95d316 100644
--- a/compiled/src/c_makePlots.C
+++ b/compiled/src/c_makePlots.C
@@ -2,7 +2,7 @@
 #define BOOST_PYTHON_MAX_ARITY 20
 #include <boost/python.hpp>
 #include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
+#include "boost/python/numpy.hpp"
 #include "boost/python/list.hpp"
 #include "boost/python/str.hpp"
 //#include "boost/filesystem.hpp"
@@ -13,6 +13,7 @@
 #include <boost/python/exception_translator.hpp>
 #include <exception>
 #include "../interface/pythonToSTL.h"
+#include "../interface/helper.h"
 #include "friendTreeInjector.h"
 #include "TROOT.h"
 #include "colorToTColor.h"
@@ -23,6 +24,7 @@
 #include "TFile.h"
 #include "TStyle.h"
 #include <algorithm>
+#include "TEfficiency.h"
 
 using namespace boost::python; //for some reason....
 
@@ -31,6 +33,15 @@ static void mergeOverflow(TH1F*h){
     h->SetBinContent(1,h->GetBinContent(1)+h->GetBinContent(0));
 }
 
+void setStyles(TH1* h){
+    h->GetXaxis()->SetTitleSize(0.05);
+    h->GetXaxis()->SetTitleOffset(0.0);
+    h->GetXaxis()->SetLabelSize(0.05);
+    h->GetYaxis()->SetTitleSize(0.05);
+    h->GetYaxis()->SetTitleOffset(1.0);
+    h->GetYaxis()->SetLabelSize(0.05);
+}
+
 
 void makePlots(
         const boost::python::list intextfiles,
@@ -200,7 +211,10 @@ void makePlots(
 
     allhistos.at(0)->Draw("AXIS");
     for(size_t i=0;i<s_names.size();i++){
-        allhistos.at(i)->Draw("same,hist");
+    	if(makeWidthProfile)
+    		allhistos.at(i)->Draw("same,e2");
+    	else
+    		allhistos.at(i)->Draw("same,hist");
     }
     leg->Draw("same");
 
@@ -223,7 +237,7 @@ void makeEffPlots(
         std::string outfile,
         std::string xaxis,
         std::string yaxis,
-        int rebinfactor,
+        int nbins,
         bool setLogY,
 	float Xmin,
 	float Xmax,
@@ -298,7 +312,15 @@ void makeEffPlots(
     injector.createChain();
 
     TChain* c=injector.getChain();
-    std::vector<TH1F*> allhistos;
+
+    TString tfileout=toutfile;
+    tfileout=tfileout(0,tfileout.Length()-4);
+    tfileout+=".root";
+
+    TFile * f = new TFile(tfileout,"RECREATE");
+    TCanvas cv("plots");
+    std::vector<TEfficiency*> allhistos;
+    TH1F * axishisto=new TH1F("AXIS","AXIS",nbins,Xmin,Xmax);
     TLegend * leg=new TLegend(0.2,0.75,0.8,0.88);
     leg->SetBorderSize(0);
 
@@ -309,11 +331,6 @@ void makeEffPlots(
     float max=-1e100;
     float min=1e100;
 
-    TString tfileout=toutfile;
-    tfileout=tfileout(0,tfileout.Length()-4);
-    tfileout+=".root";
-
-    TFile * f = new TFile(tfileout,"RECREATE");
     gStyle->SetOptStat(0);
 
     for(size_t i=0;i<s_names.size();i++){
@@ -322,48 +339,39 @@ void makeEffPlots(
         if(s_cutsden.at(i).Length())
             numcuts+="&&("+s_cutsden.at(i)+")";
         tmpname+=i;
+
+        TH1F *numhisto = new TH1F(tmpname,tmpname,nbins,Xmin,Xmax);
+
         c->Draw(s_vars.at(i)+">>"+tmpname,numcuts,addstr);
-        TH1F *numhisto = (TH1F*) gROOT->FindObject(tmpname);
-        if(rebinfactor>1)
-            numhisto->Rebin(rebinfactor);
-        TH1F *denhisto=(TH1F *)numhisto->Clone(tmpname+"den");
 
+        TH1F *denhisto=(TH1F *)numhisto->Clone(tmpname+"den");
 
         c->Draw(s_vars.at(i)+">>"+tmpname+"den",s_cutsden.at(i),addstr);
-        for(int bin=0;bin<=numhisto->GetNbinsX();bin++){
-            float denbin=denhisto->GetBinContent(bin);
-            if(denbin){
-                numhisto->SetBinContent(bin, numhisto->GetBinContent(bin)/denbin);
-            }
-            else{
-                numhisto->SetBinContent(bin,0);
-            }
-        }
-        TH1F *histo = numhisto; //(TH1F *)()->Clone(tmpname) ;
 
 
-        histo->SetLineColor(colorToTColor(s_colors.at(i)));
-        histo->SetLineStyle(lineToTLineStyle(s_colors.at(i)));
-        histo->SetTitle(s_names.at(i));
-        histo->SetName(s_names.at(i));
+        TEfficiency* eff = new TEfficiency(s_names.at(i),s_names.at(i),nbins,Xmin,Xmax);
+        //eff->SetUseWeightedEvents(true);
+        //eff->SetStatisticOption(TEfficiency::kFNormal);
 
-        histo->SetFillStyle(0);
-        histo->SetLineWidth(2);
+        eff->SetTotalHistogram(*denhisto,"");
+        eff->SetPassedHistogram(*numhisto,"");
 
-        float tmax=histo->GetMaximum();
-        float tmin=histo->GetMinimum();
-        if(tmax>max)max=tmax;
-        if(tmin<min)min=tmin;
-        if(OverrideMin<OverrideMax){
-            //std::cout << "overriding min/max"<< std::endl;
-            max = OverrideMax;
-            min = OverrideMin;
-        }
-        //std::cout << "min" << min << " max" << max << std::endl;
 
-        allhistos.push_back(histo);
+        eff->SetLineColor(colorToTColor(s_colors.at(i)));
+        eff->SetLineStyle(lineToTLineStyle(s_colors.at(i)));
 
-        histo->Write();
+
+        eff->SetFillStyle(0);
+        eff->SetLineWidth(2);
+
+        allhistos.push_back(eff);
+        eff->Draw();//for some reason
+        eff->Write();
+
+       // if(i)
+       //     delete numhisto;
+
+       // delete denhisto;
 
 
     }
@@ -371,19 +379,27 @@ void makeEffPlots(
         leg->AddEntry(allhistos.at(i-1),s_names.at(i-1),"l");
     }
 
-    TCanvas cv("plots");
 
     if(setLogY) cv.SetLogy();
-    allhistos.at(0)->Draw("AXIS");
-    allhistos.at(0)->GetYaxis()->SetRangeUser(min,1.3*max); //space for legend on top
 
-    allhistos.at(0)->GetXaxis()->SetTitle(xaxis.data());
-    if(Xmin<Xmax)  allhistos.at(0)->GetXaxis()->SetRangeUser(Xmin,Xmax);
-    allhistos.at(0)->GetYaxis()->SetTitle(yaxis.data());
+    cv.SetLeftMargin(0.15);
+    cv.SetBottomMargin(0.15);
+
+    axishisto->Draw("AXIS");
+   // axishisto->SetLineColorAlpha(kBlack,0.8);
+    axishisto->GetYaxis()->SetRangeUser(0,1.3); //space for legend on top
+    if(OverrideMax>OverrideMin)
+        axishisto->GetYaxis()->SetRangeUser(OverrideMin,OverrideMax);
+
+    axishisto->GetXaxis()->SetTitle(xaxis.data());
+    axishisto->GetYaxis()->SetTitle(yaxis.data());
+    setStyles(axishisto);
+    axishisto->Draw("AXIS");
 
-    allhistos.at(0)->Draw("AXIS");
     for(size_t i=0;i<s_names.size();i++){
-        allhistos.at(i)->Draw("same,hist");
+        auto ld=getLineDouble(allhistos.at(i));
+        ld->Draw("same,P");
+        allhistos.at(i)->Draw("same,P");
     }
     leg->Draw("same");
 
diff --git a/compiled/src/c_makeROCs.C b/compiled/src/c_makeROCs.C
index 5462f20..f3bbf5d 100644
--- a/compiled/src/c_makeROCs.C
+++ b/compiled/src/c_makeROCs.C
@@ -1,7 +1,7 @@
-#define BOOST_PYTHON_MAX_ARITY 20
+#define BOOST_PYTHON_MAX_ARITY 40
 #include <boost/python.hpp>
 #include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
+#include "boost/python/numpy.hpp"
 #include "boost/python/list.hpp"
 #include "boost/python/str.hpp"
 //#include "boost/filesystem.hpp"
@@ -14,6 +14,7 @@
 #include "../interface/pythonToSTL.h"
 #include "friendTreeInjector.h"
 #include "rocCurveCollection.h"
+#include <fstream>
 
 using namespace boost::python; //for some reason....
 
@@ -36,8 +37,13 @@ void makeROCs(
         bool logy,
         bool individual,
         std::string xaxis,
+        std::string yaxis,
         int nbins,
-		std::string treename
+		std::string treename,
+		double xmin,
+		std::string experimentlabel,std::string lumilabel,std::string prelimlabel,
+		const boost::python::list yscales,
+		bool no_friend_tree
 ) {
 
     std::vector<TString>  s_intextfiles=toSTLVector<TString>(intextfiles);
@@ -49,6 +55,7 @@ void makeROCs(
     std::vector<TString>  s_cuts = toSTLVector<TString>(cuts);
     std::vector<TString>  s_invalidate =toSTLVector<TString>(invalidate);
     std::vector<TString>  s_extralegend=toSTLVector<TString>(extralegend);
+    std::vector<float>    s_yscales =toSTLVector<float>(yscales);
     /*
      * Size checks!!!
      */
@@ -58,7 +65,8 @@ void makeROCs(
             s_names.size() != s_vetos.size()||
             s_names.size() != s_colors.size()||
             s_names.size() != s_cuts.size() ||
-            s_invalidate.size() != s_names.size())
+            s_invalidate.size() != s_names.size() ||
+            s_names.size() != s_yscales.size())
         throw std::runtime_error("makeROCs: input lists must have same size");
 
     //make unique list of infiles
@@ -86,7 +94,21 @@ void makeROCs(
     friendTreeInjector injector((TString)treename);
     std::vector<friendTreeInjector> injectors(u_infiles.size(), friendTreeInjector((TString)treename));
     std::vector<TChain*> chains(u_infiles.size());
-    if(individual){
+    if(no_friend_tree){
+        for(size_t i=0;i<u_infiles.size();i++){
+            TString filename = u_infiles.at(i);
+            std::ifstream file(filename.Data(), std::ifstream::in);
+            if(!file){
+                std::cerr << "makeROC: could not open file "<< filename <<std::endl;
+            }
+            chains.at(i) = new TChain();
+            TString b;
+            while (file >> b) {
+                chains.at(i)->Add(b+"/"+treename);
+            }
+        }
+    }
+    else if(individual){
         if(u_infiles.size() != s_names.size())
             throw std::runtime_error("makeROCs: file list must have same size as legends etc. in individual mode");
         for(size_t i=0;i<u_infiles.size();i++){
@@ -107,10 +129,12 @@ void makeROCs(
     }
 
     TString xaxisstr=xaxis;
+    TString yaxisstr=yaxis;
 
     rocCurveCollection rocs;
     rocs.setNBins(nbins);
     rocs.setXaxis(xaxisstr);
+    rocs.setYaxis(yaxisstr);
 
     rocs.setCommentLine0(firstcomment.data());
     rocs.setCommentLine1(secondcomment.data());
@@ -118,21 +142,22 @@ void makeROCs(
     rocs.setCMSStyle(usecmsstyle);
 
     for(size_t i=0;i<s_names.size();i++){
-        if(s_cuts.size())
-            rocs.addROC(s_names.at(i),s_probabilities.at(i),s_truths.at(i),
-                    s_vetos.at(i),s_colors.at(i),s_cuts.at(i),s_invalidate.at(i));
-        else
-            rocs.addROC(s_names.at(i),s_probabilities.at(i),s_truths.at(i),
-                    s_vetos.at(i),s_colors.at(i),"",s_invalidate.at(i));
+        TString cutstr="";
+        if(s_cuts.size()) cutstr=s_cuts.at(i);
+        rocs.addROC(s_names.at(i),s_probabilities.at(i),s_truths.at(i),
+                s_vetos.at(i),s_colors.at(i),cutstr,s_invalidate.at(i),
+                s_yscales.at(i));
     }
     for(const auto& s:s_extralegend)
         rocs.addExtraLegendEntry(s);
 
-    if(individual){
-        rocs.printRocs(0,(TString)outfile,"",0,0,&chains);
+    if(individual || no_friend_tree){
+        rocs.printRocs(0,(TString)outfile,"",0,0,&chains,xmin,
+        		experimentlabel,lumilabel,prelimlabel);
     }
     else{
-        rocs.printRocs(injector.getChain(),(TString)outfile);
+        rocs.printRocs(injector.getChain(),(TString)outfile,"",0,0,0,xmin,
+        		experimentlabel,lumilabel,prelimlabel);
     }
 }
 
diff --git a/compiled/src/c_meanNormZeroPad.C b/compiled/src/c_meanNormZeroPad.C
index c5af407..fc6a794 100644
--- a/compiled/src/c_meanNormZeroPad.C
+++ b/compiled/src/c_meanNormZeroPad.C
@@ -2,7 +2,7 @@
 #define BOOST_PYTHON_MAX_ARITY 20
 #include <boost/python.hpp>
 #include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
+#include "boost/python/numpy.hpp"
 #include "boost/python/list.hpp"
 #include "boost/python/str.hpp"
 //#include "boost/filesystem.hpp"
@@ -36,12 +36,12 @@ enum modeen {en_flat,en_particlewise};
 
 // Functions to demonstrate extraction
 
-void priv_meanNormZeroPad(boost::python::numeric::array& numpyarray,
+void priv_meanNormZeroPad(boost::python::numpy::ndarray& numpyarray,
         std::vector<__hidden::indata>   data,
         TFile* tfile, modeen mode);
 
 
-void priv_process(boost::python::numeric::array numpyarray,
+void priv_process(boost::python::numpy::ndarray numpyarray,
         const boost::python::list inl_norms,
         const boost::python::list inl_means ,
         const boost::python::list inl_branches,
@@ -56,7 +56,7 @@ void priv_process(boost::python::numeric::array numpyarray,
  * wrapper to create input to C++ only function
  * Can be generalised to doing it at the same time for many different sized branches
  */
-void process(boost::python::numeric::array numpyarray,
+void process(boost::python::numpy::ndarray numpyarray,
         const boost::python::list inl_norms,
         const boost::python::list inl_means ,
         const boost::python::list inl_branches,
@@ -70,7 +70,7 @@ void process(boost::python::numeric::array numpyarray,
 
 }
 
-void particlecluster(boost::python::numeric::array numpyarray,
+void particlecluster(boost::python::numpy::ndarray numpyarray,
         const boost::python::list inl_norms,
         const boost::python::list inl_means ,
         const boost::python::list inl_branches,
@@ -86,7 +86,7 @@ void particlecluster(boost::python::numeric::array numpyarray,
 }
 
 
-void priv_process(boost::python::numeric::array numpyarray,
+void priv_process(boost::python::numpy::ndarray numpyarray,
         const boost::python::list inl_norms,
         const boost::python::list inl_means ,
         const boost::python::list inl_branches,
@@ -124,7 +124,7 @@ void priv_process(boost::python::numeric::array numpyarray,
 
 //root-only functions
 //change all inputs except for in_data to vectors for simultaneous use
-void priv_meanNormZeroPad(boost::python::numeric::array& numpyarray,
+void priv_meanNormZeroPad(boost::python::numpy::ndarray& numpyarray,
         std::vector<__hidden::indata>   datacollection,
         TFile* tfile, modeen mode){
 
@@ -177,7 +177,7 @@ void priv_meanNormZeroPad(boost::python::numeric::array& numpyarray,
 }
 
 
-void priv_particlecluster(boost::python::numeric::array& numpyarray,
+void priv_particlecluster(boost::python::numpy::ndarray& numpyarray,
         std::vector<__hidden::indata>   datacollection,
         TFile* tfile){
 
@@ -211,13 +211,13 @@ void particle_binner(
         std::string xbranch, std::string xcenter, int xbins, float xwidth,
         std::string ybranch, std::string ycenter, int ybins, float ywidth,
         //binned variables
-        boost::python::numeric::array numpyarray,
+        boost::python::numpy::ndarray numpyarray,
         const boost::python::list inl_norms,
         const boost::python::list inl_means ,
         const boost::python::list inl_branches,
         int nmax,
         //summed variables
-        boost::python::numeric::array sum_npy_array,
+        boost::python::numpy::ndarray sum_npy_array,
         const boost::python::list sum_inl_norms,
         const boost::python::list sum_inl_means ,
         const boost::python::list summed_branches
@@ -353,7 +353,7 @@ void particle_binner(
     tfile->Close();
     delete tfile;
 }
-void priv_fillDensityMap(boost::python::numeric::array numpyarray,
+void priv_fillDensityMap(boost::python::numpy::ndarray numpyarray,
         double norm,
         std::string in_branch,
         std::string in_weightbranch,
@@ -363,7 +363,7 @@ void priv_fillDensityMap(boost::python::numeric::array numpyarray,
         double offset, bool count=false
 );
 
-void fillDensityMap(boost::python::numeric::array numpyarray,
+void fillDensityMap(boost::python::numpy::ndarray numpyarray,
         double norm,
         std::string in_branch,
         std::string in_weightbranch,
@@ -376,7 +376,7 @@ void fillDensityMap(boost::python::numeric::array numpyarray,
             xbranch,xcenter,xbins,xwidth,
             ybranch,ycenter,ybins,ywidth,offset,false);
 }
-void fillCountMap(boost::python::numeric::array numpyarray,
+void fillCountMap(boost::python::numpy::ndarray numpyarray,
         double norm,
         std::string in_weightbranch,
         std::string filename, std::string counter_branch,
@@ -390,7 +390,7 @@ void fillCountMap(boost::python::numeric::array numpyarray,
             ybranch,ycenter,ybins,ywidth,offset,true);
 }
 
-void fillDensityLayers(boost::python::numeric::array numpyarray,
+void fillDensityLayers(boost::python::numpy::ndarray numpyarray,
         const boost::python::list  inl_norms,
         const boost::python::list  inl_means,
         const boost::python::list  in_branches,
@@ -550,7 +550,7 @@ void fillDensityLayers(boost::python::numeric::array numpyarray,
 
 }
 
-void priv_fillDensityMap(boost::python::numeric::array numpyarray,
+void priv_fillDensityMap(boost::python::numpy::ndarray numpyarray,
         double norm,
         std::string in_branch,
         std::string in_weightbranch,
@@ -666,7 +666,7 @@ void doScaling(bool doit){
 
 // Expose classes and methods to Python
 BOOST_PYTHON_MODULE(c_meanNormZeroPad) {
-    boost::python::numeric::array::set_module_and_type("numpy", "ndarray");
+    boost::python::numpy::initialize();
     __hidden::indata();//for some reason exposing the class prevents segfaults. garbage collector?
     //anyway, it doesn't hurt, just leave this here
     def("process", &process);
diff --git a/compiled/src/c_randomSelect.C b/compiled/src/c_randomSelect.C
index 890b268..e04f49b 100644
--- a/compiled/src/c_randomSelect.C
+++ b/compiled/src/c_randomSelect.C
@@ -10,7 +10,7 @@
 
 #include <boost/python.hpp>
 #include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
+#include "boost/python/numpy.hpp"
 #include "boost/python/list.hpp"
 #include "boost/python/str.hpp"
 #include <boost/python/exception_translator.hpp>
@@ -30,13 +30,13 @@ public:
     ~randomSelector(){
         delete rand_;
     }
-    void select(boost::python::numeric::array probs , boost::python::numeric::array indices, const size_t nselect);
+    void select(boost::python::numpy::ndarray probs , boost::python::numpy::ndarray indices, const size_t nselect);
 private:
     TRandom3* rand_;
 } sel;
 
 
-void randomSelector::select(boost::python::numeric::array  probs, boost::python::numeric::array  selects, const size_t nselect){
+void randomSelector::select(boost::python::numpy::ndarray  probs, boost::python::numpy::ndarray  selects, const size_t nselect){
 
     const size_t size = len(probs);
     if(nselect>size){
@@ -64,8 +64,8 @@ void randomSelector::select(boost::python::numeric::array  probs, boost::python:
 }
 
 //indices are initialised to 0, probs describe the remove probabilities
-void randSelect(boost::python::numeric::array probs,
-        boost::python::numeric::array indices,
+void randSelect(boost::python::numpy::ndarray probs,
+        boost::python::numpy::ndarray indices,
         int nentries){
 
     sel.select(probs,indices,nentries);
@@ -74,7 +74,8 @@ void randSelect(boost::python::numeric::array probs,
 
 // Expose classes and methods to Python
 BOOST_PYTHON_MODULE(c_randomSelect) {
-    boost::python::numeric::array::set_module_and_type("numpy", "ndarray");
+
+    boost::python::numpy::initialize();
 
     def("randSelect", &randSelect);
 }
diff --git a/compiled/src/c_readArrThreaded.C b/compiled/src/c_readArrThreaded.C
deleted file mode 100644
index 8435fed..0000000
--- a/compiled/src/c_readArrThreaded.C
+++ /dev/null
@@ -1,326 +0,0 @@
-
-
-#define BOOST_PYTHON_MAX_ARITY 20
-#include <boost/python.hpp>
-#include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
-#include "boost/python/list.hpp"
-#include "boost/python/str.hpp"
-#include <boost/python/exception_translator.hpp>
-#include <exception>
-#include "../interface/pythonToSTL.h"
-#include "../interface/helper.h"
-
-#include <iostream>
-
-#include <pthread.h>
-
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "../interface/quicklz.h"
-
-#define MAXCHUNK (0xffffffff - 400)
-
-bool debug=false;
-
-class readThread{
-public:
-    readThread(long arrpointer,
-            const std::string& filenamein,
-            long size,bool rmwhendone){
-
-        arrbuf=(float*)(void*)arrpointer;
-        infile=filenamein;
-        length=size;
-        length*=sizeof(float);
-        pthread=new pthread_t();
-        done=0;
-        id=lastid;
-        lastid++;
-        if(lastid>0xFFFE)
-            lastid=0;
-        removewhendone=rmwhendone;
-        state_decompress = new qlz_state_decompress();
-    }
-
-    ~readThread(){
-        if(pthread)delete pthread;
-        if(state_decompress)delete state_decompress;
-    }
-    void start(){
-        int  iret= pthread_create( pthread, NULL, readArrThread, (void*) this);
-        if(iret){
-            std::cerr << "Error - pthread_create() return code: "<<iret <<std::endl;
-            throw std::runtime_error("Unable to create thread");
-        }
-    }
-
-    bool isDone()const{
-        return done;
-    }
-
-    void readBlocking(){
-        start();
-        join();
-    }
-
-    void join(double timeout=0){
-        pthread_join( *pthread, NULL);
-    }
-
-    static void * readArrThread( void *ptr );
-
-    int getId()const{return id;}
-
-private:
-    static int lastid;
-    float * arrbuf;
-    std::string infile;
-    size_t length;
-    pthread_t * pthread;
-    uint8_t done;
-    int id;
-    bool removewhendone;
-    qlz_state_decompress *state_decompress;
-};
-
-
-int readThread::lastid=0;
-
-
-
-
-void * readThread::readArrThread( void *ptr ){
-
-    readThread* thisthread=(readThread*)ptr;
-    if(debug)
-        std::cout << "thread started " << thisthread->infile <<std::endl;
-    FILE *ifile;
-    uint8_t nchunks=0;
-
-    ifile = fopen(thisthread->infile.data(), "rb");
-    fseek(ifile, 0, SEEK_END);
-    unsigned int filelength = ftell(ifile);
-    fseek(ifile, 0, SEEK_SET);
-    fread(&nchunks, 1, 1, ifile);
-    std::vector<size_t> chunksizes(nchunks,0);
-    size_t vecbytesize=nchunks*sizeof(size_t);
-    fread(&chunksizes[0], 1, vecbytesize, ifile);
-
-    if(debug){
-        std::cout << "file has "<< (int)nchunks << " chunks"<<std::endl;
-        for(auto v:chunksizes)
-            std::cout << " "<< v;
-        std::cout << std::endl;
-    }
-
-    //read in chunks
-    size_t chunk=0;
-    size_t totalsize=0;
-    size_t writepos=0;
-    size_t allread=0;
-    char* src =0;
-    if(debug)
-        std::cout << "Full length " << thisthread->length << std::endl;
-    while(chunk<nchunks && thisthread->length){
-
-        src = new char[chunksizes.at(chunk)];
-        fread(src, 1, chunksizes.at(chunk), ifile);
-        totalsize += qlz_size_decompressed(src);
-
-        if(debug)
-            std::cout << writepos << " " << totalsize << ", " << allread<<std::endl;
-
-        //if(writepos)writepos--;
-
-        allread += qlz_decompress(src, &(thisthread->arrbuf[writepos/sizeof(float)]), thisthread->state_decompress);
-        if(debug)
-            std::cout << "allread: " << allread << std::endl;
-        writepos=totalsize;
-        chunk++;
-        delete src;
-    }
-    if(debug)
-        std::cout << "allread "<< allread << " totalsize "<< totalsize <<std::endl;
-
-    //while ... if len>thisthread->length throw
-
-    //totalsize compare to vector
-
-    if(allread!=thisthread->length){
-        fclose(ifile);
-        throw std::runtime_error("readArrThread:target array size does not match ");
-    }
-    fclose(ifile);
-    thisthread->done=1;//atomic
-
-    if(thisthread->removewhendone){//thisthread->infile.data()
-        std::string rmstring="rm -f ";
-        rmstring+=thisthread->infile;
-        system(rmstring.data());
-    }
-    return 0;
-}
-
-using namespace boost::python;
-
-
-//module info and interface
-
-size_t maxreads=1000;
-std::vector<readThread*> allreads(maxreads,0);
-size_t acounter=0;
-
-bool readBlocking(long arrpointer,
-        std::string filenamein,
-        const boost::python::list shape,
-        bool rmwhendone){
-
-    long length=1;
-    std::vector<int> sshape=toSTLVector<int>(shape);
-    for(const auto& s:sshape)
-        length*=s;
-
-    readThread * t=new readThread(arrpointer,filenamein,length,rmwhendone);
-    t->readBlocking();
-    bool succ=t->isDone();
-    delete t;
-    return succ;
-}
-
-int startReading(long arrpointer,
-        std::string filenamein,
-        const boost::python::list shape,
-        bool rmwhendone){
-
-    long length=1;
-    std::vector<int> sshape=toSTLVector<int>(shape);
-    for(const auto& s:sshape)
-        length*=s;
-
-    readThread * t=new readThread(arrpointer,filenamein,length,rmwhendone);
-    t->start();
-    if(allreads.at(acounter) && !allreads.at(acounter)->isDone())
-        throw std::out_of_range("c_readArrThreaded::startReading: overflow. Increase number of maximum threads (setMax)");
-    allreads.at(acounter)=t;
-    acounter++;
-    if(acounter>=maxreads)
-        acounter=0;
-    return t->getId();
-}
-
-bool isDone(int id){
-    for(auto& t:allreads){
-        if(!t)continue;
-        if(t->getId()==id){
-            if(t->isDone()){
-                t->join();
-                delete t;
-                t=0;
-                return true;
-            }
-            else{
-                return false;
-            }
-        }
-    }
-    if(debug)
-        std::cerr<<"isDone: ID "<< id << " not found "<<std::endl;
-    return true;
-}
-
-void setMax(int m){
-    if(m>0xFFFE)
-        throw std::runtime_error("setMax: must be smaller than 65536");
-    maxreads=m;
-    allreads.resize(m,0);
-}
-
-
-
-void writeArray(long arrpointer,
-        std::string file, const boost::python::list shape){
-
-    long length=1;
-    std::vector<int> sshape=toSTLVector<int>(shape);
-    for(const auto& s:sshape)
-        length*=s;
-
-    length*=sizeof(float);
-
-    FILE *ofile;
-    char *src=(char*)(void*)arrpointer;
-    char *dst;
-
-    qlz_state_compress *state_compress = new qlz_state_compress();
-
-    ofile = fopen(file.data(), "wb");
-
-    // allocate "uncompressed size" + 400 for the destination buffer
-    dst = new char [length + 400];
-
-if(debug)
-    std::cout << "array has "<< length << " bytes" <<std::endl;
-    // compress and write result
-    size_t remaininglength=length;
-    size_t len2 =0;
-    size_t startbyte=0;
-    uint8_t nchunks=1;
-    std::vector<size_t> chunksizes;
-
-    while(remaininglength){
-
-        size_t uselength=0;
-        if(remaininglength > MAXCHUNK){
-            uselength=MAXCHUNK;
-            remaininglength-=MAXCHUNK;
-            nchunks++;
-            if(!nchunks){
-                //throw etc
-                //TBI (only kicks in at about 1TB)
-            }
-
-        }
-        else{
-            uselength=remaininglength;
-            remaininglength=0;
-        }
-        size_t thissize = qlz_compress(&src[startbyte],&dst[len2], uselength, state_compress);
-        chunksizes.push_back(thissize);
-        len2+=thissize;
-        startbyte+=uselength;
-    }
-    if(debug){
-    std::cout << "writing "<< len2 << " compressed bytes in "<< (int)nchunks <<" chunks: " <<std::endl;
-    for(const auto c:chunksizes)
-        std::cout << c <<" ";
-    std::cout << std::endl;
-    }
-
-    fwrite(&nchunks,1,1,ofile);
-    fwrite(&chunksizes[0],1,chunksizes.size()*sizeof(size_t),ofile);
-    fwrite(dst, len2, 1, ofile);
-
-
-    fclose(ofile);
-
-    delete dst;
-    delete state_compress;
-
-}
-
-
-BOOST_PYTHON_MODULE(c_readArrThreaded) {
-    //PyEval_InitThreads();
-    boost::python::numeric::array::set_module_and_type("numpy", "ndarray");
-
-    def("readBlocking", &readBlocking);
-
-    def("startReading", &startReading);
-    def("isDone", &isDone);
-
-    def("writeArray", &writeArray);
-
-}
diff --git a/compiled/src/c_simpleArray.C b/compiled/src/c_simpleArray.C
new file mode 100644
index 0000000..61f1f36
--- /dev/null
+++ b/compiled/src/c_simpleArray.C
@@ -0,0 +1,103 @@
+/*
+ * c_simpleArray.C
+ *
+ *  Created on: 16 Nov 2019
+ *      Author: jkiesele
+ *
+ *  Simple reading and writing of numpy arrays using the simpleArray class
+ *
+ *  Only implemented for float32, int32 arrays
+ *
+ *   just a wrapper module
+ */
+
+#include "../interface/helper.h"
+#include "../interface/simpleArray.h"
+#include <cstdint>
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+
+using namespace djc;
+
+using namespace p;
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(simpleArray_float32_set_overloads, simpleArray_float32::set, 2, 6);
+
+
+BOOST_PYTHON_MODULE(c_simpleArray) {
+    Py_Initialize();
+    np::initialize();
+
+    p::class_<simpleArray_float32 >("simpleArrayF")
+        .def("readDtypeFromFile", &simpleArray_float32::readDtypeFromFile)
+
+        .def(self==self)
+        .def("dtypeI", &simpleArray_float32::dtypeI)
+
+        .def("setName", &simpleArray_float32::setName)
+        .def("name", &simpleArray_float32::name)
+        .def("setFeatureNames", &simpleArray_float32::setFeatureNamesPy)
+        .def("featureNames", &simpleArray_float32::featureNamesPy)
+
+        .def("hasNanOrInf", &simpleArray_float32::hasNanOrInf)
+
+        //explicit overloads necessary
+        .def<void (simpleArray_float32::*)(const size_t i, float val)>("set", &simpleArray_float32::set)
+        .def<void (simpleArray_float32::*)(const size_t i, const size_t j, float val)>("set", &simpleArray_float32::set)
+        .def<void (simpleArray_float32::*)(const size_t i, const size_t j, const size_t k, float val)>("set", &simpleArray_float32::set)
+        .def<void (simpleArray_float32::*)(const size_t i, const size_t j, const size_t k, const size_t l, float val)>("set", &simpleArray_float32::set)
+        .def<void (simpleArray_float32::*)(const size_t i, const size_t j, const size_t k, const size_t l, const size_t m, float val)>("set", &simpleArray_float32::set)
+
+
+        .def("readFromFile", &simpleArray_float32::readFromFile)
+        .def("writeToFile", &simpleArray_float32::writeToFile)
+        .def("assignFromNumpy", &simpleArray_float32::assignFromNumpy)
+        .def("transferToNumpy", &simpleArray_float32::transferToNumpy)
+        .def("createFromNumpy", &simpleArray_float32::createFromNumpy)
+        .def("copyToNumpy", &simpleArray_float32::copyToNumpy)
+        .def("isRagged", &simpleArray_float32::isRagged)
+        .def("split", &simpleArray_float32::split)
+        .def("getSlice", &simpleArray_float32::getSlice)
+        .def<void (simpleArray_float32::*)(const simpleArray_float32&)>("append", &simpleArray_float32::append)
+        .def("cout", &simpleArray_float32::cout)
+        .def("size", &simpleArray_float32::isize)
+        .def("shape", &simpleArray_float32::shapePy);
+    ;
+    p::class_<simpleArray_int32 >("simpleArrayI")
+        .def("readDtypeFromFile", &simpleArray_int32::readDtypeFromFile)
+
+        .def(self==self)
+
+        .def("dtypeI", &simpleArray_int32::dtypeI)
+
+        .def("setName", &simpleArray_int32::setName)
+        .def("name", &simpleArray_int32::name)
+        .def("setFeatureNames", &simpleArray_int32::setFeatureNamesPy)
+        .def("featureNames", &simpleArray_int32::featureNamesPy)
+
+        .def("hasNanOrInf", &simpleArray_int32::hasNanOrInf)
+
+        .def<void (simpleArray_int32::*)(const size_t i, int val)>("set", &simpleArray_int32::set)
+        .def<void (simpleArray_int32::*)(const size_t i, const size_t j, int val)>("set", &simpleArray_int32::set)
+        .def<void (simpleArray_int32::*)(const size_t i, const size_t j, const size_t k, int val)>("set", &simpleArray_int32::set)
+        .def<void (simpleArray_int32::*)(const size_t i, const size_t j, const size_t k, const size_t l, int val)>("set", &simpleArray_int32::set)
+        .def<void (simpleArray_int32::*)(const size_t i, const size_t j, const size_t k, const size_t l, const size_t m, int val)>("set", &simpleArray_int32::set)
+
+
+        .def("readFromFile", &simpleArray_int32::readFromFile)
+        .def("writeToFile", &simpleArray_int32::writeToFile)
+        .def("assignFromNumpy", &simpleArray_int32::assignFromNumpy)
+        .def("transferToNumpy", &simpleArray_int32::transferToNumpy)
+        .def("createFromNumpy", &simpleArray_int32::createFromNumpy)
+        .def("copyToNumpy", &simpleArray_int32::copyToNumpy)
+        .def("isRagged", &simpleArray_int32::isRagged)
+        .def("split", &simpleArray_int32::split)
+        .def("getSlice", &simpleArray_int32::getSlice)
+        .def<void (simpleArray_int32::*)(const simpleArray_int32&)>("append", &simpleArray_int32::append) //just use the explicit one here
+        .def("cout", &simpleArray_int32::cout)
+        .def("size", &simpleArray_int32::isize)
+        .def("shape", &simpleArray_int32::shapePy);
+    ;
+
+}
+
diff --git a/compiled/src/c_storeTensor.C b/compiled/src/c_storeTensor.C
deleted file mode 100644
index 4570ccd..0000000
--- a/compiled/src/c_storeTensor.C
+++ /dev/null
@@ -1,97 +0,0 @@
-//allows functions with 18 or less paramenters
-#define BOOST_PYTHON_MAX_ARITY 20
-#include <boost/python.hpp>
-#include "boost/python/extract.hpp"
-#include "boost/python/numeric.hpp"
-#include "boost/python/list.hpp"
-#include "boost/python/str.hpp"
-//#include "boost/filesystem.hpp"
-#include <iostream>
-#include <stdint.h>
-#include "TString.h"
-#include <string>
-#include <vector>
-#include "TFile.h"
-#include "TTree.h"
-//don't use new root6 stuff, has problems with leaf_list
-//#include "TTreeReader.h"
-//#include "TTreeReaderValue.h"
-//#include "TTreeReaderArray.h"
-//#include "TTreeReaderUtils.h"
-#include <boost/python/exception_translator.hpp>
-#include <exception>
-#include "TStopwatch.h"
-#include "../interface/indata.h"
-#include "../interface/pythonToSTL.h"
-#include "../interface/helper.h"
-#include <cmath>
-#include <TROOT.h>
-#include <TChain.h>
-#include <TFile.h>
-
-
-
-
-
-
-
-
-/*
- * wrapper to create input to C++ only function
- * Can be generalised to doing it at the same time for many different sized branches
- */
-void store(long numpyarray, const boost::python::list _shape,
-        std::string filename) {
-    std::vector<int> shape = toSTLVector<int>(_shape);    
-    int nentries = shape[0];
-
-    int ndims = shape.size() - 1;
-    int shape_root[ndims];
-    size_t flattened_length = 1;
-    for(size_t i = 1; i < shape.size(); i++) {
-        shape_root[i-1] = shape[i]; 
-        flattened_length *= shape[i];
-    }
-    float data_root[flattened_length];
-    float* data_in = reinterpret_cast<float*>(numpyarray);
-
-    TFile *outfile = new TFile(filename.c_str(), "RECREATE");
-    TDirectory *dir = outfile->mkdir("prediction", "prediction");
-    dir->cd();
-    TTree *t = new TTree("tree", "tree");
-
-    t->Branch("ndims"  ,&ndims,   "ndims_/i");
-    t->Branch("shape",   shape_root,   "shape_[ndims_]/i");
-    t->Branch("flattened_length",   &flattened_length,   "flattened_length_/i");
-    t->Branch("data",   data_root,   "data_[flattened_length_]/f");
-
-
-    for(size_t e = 0; e < nentries; e++) {
-        for (size_t i = 0; i < flattened_length; i++) {
-            data_root[i]=data_in[e*flattened_length+i];
-        }
-        t->Fill();
-
-    }
-
-    t->Write();
-    outfile->Close();
-    delete outfile;
-
-
-}
-
-
-
-
-
-
-
-
-// Expose classes and methods to Python
-BOOST_PYTHON_MODULE(c_storeTensor) {
-    boost::python::numeric::array::set_module_and_type("numpy", "ndarray");
-    __hidden::indata();//for some reason exposing the class prevents segfaults. garbage collector?
-    //anyway, it doesn't hurt, just leave this here
-    def("store", &store);
-}
diff --git a/compiled/src/c_testFunctions.C b/compiled/src/c_testFunctions.C
new file mode 100644
index 0000000..a9c4c2b
--- /dev/null
+++ b/compiled/src/c_testFunctions.C
@@ -0,0 +1,14 @@
+
+#include <boost/python.hpp>
+#include <boost/python/exception_translator.hpp>
+
+//tests
+#include "trainDataFileStreamer.h"
+
+
+using namespace boost::python;
+
+BOOST_PYTHON_MODULE(c_testFunctions) {
+    def("testTrainDataFileStreamer", &djc::test::testTrainDataFileStreamer);
+
+}
diff --git a/compiled/src/c_trainData.C b/compiled/src/c_trainData.C
new file mode 100644
index 0000000..80d83fc
--- /dev/null
+++ b/compiled/src/c_trainData.C
@@ -0,0 +1,88 @@
+/*
+ * c_TrainDataInferface.C
+ *
+ *  Created on: 6 Nov 2019
+ *      Author: jkiesele
+ */
+
+#include "../interface/trainData.h"
+
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+
+using namespace djc;
+
+BOOST_PYTHON_MODULE(c_trainData) {
+    Py_Initialize();
+    np::initialize();
+    using namespace p;
+    p::class_<trainData >("trainData")
+
+               .def(self==self)
+               .def(self!=self)
+
+        //excplicit overloading
+       .def<int (trainData::*)(simpleArray_float32&)>("storeFeatureArray", &trainData::storeFeatureArray)
+       .def<int (trainData::*)(simpleArray_int32&)>("storeFeatureArray", &trainData::storeFeatureArray)
+
+       .def<int (trainData::*)(simpleArray_float32&)>("storeTruthArray", &trainData::storeTruthArray)
+       .def<int (trainData::*)(simpleArray_int32&)>("storeTruthArray", &trainData::storeTruthArray)
+
+       .def<int (trainData::*)(simpleArray_float32&)>("storeWeightArray", &trainData::storeWeightArray)
+       .def<int (trainData::*)(simpleArray_int32&)>("storeWeightArray", &trainData::storeWeightArray)
+
+
+     //  .def("featureList", &trainData::featureList)
+     //  .def("truthList", &trainData::truthList)
+     //  .def("weightList", &trainData::weightList)
+
+       .def("nFeatureArrays", &trainData::nFeatureArrays)
+       .def("nTruthArrays", &trainData::nTruthArrays)
+       .def("nWeightArrays", &trainData::nWeightArrays)
+
+       .def("truncate", &trainData::truncate)
+       .def("append", &trainData::append)
+       .def("split", &trainData::split)
+       .def("nElements", &trainData::nElements)
+       .def("readMetaDataFromFile", &trainData::readMetaDataFromFile)
+
+       .def("readFromFile", &trainData::readFromFile)
+       .def("readFromFileBuffered", &trainData::readFromFileBuffered)
+       .def("writeToFile", &trainData::writeToFile)
+       .def("addToFile", &trainData::addToFile)
+
+
+       .def("copy", &trainData::copy)
+       .def("clear", &trainData::clear)
+       .def("skim", &trainData::skim)
+       .def("getSlice", &trainData::getSlice)
+
+       .def("getNumpyFeatureShapes", &trainData::getNumpyFeatureShapes)
+       .def("getNumpyTruthShapes", &trainData::getNumpyTruthShapes)
+       .def("getNumpyWeightShapes", &trainData::getNumpyWeightShapes)
+
+       .def("getNumpyFeatureDTypes", &trainData::getNumpyFeatureDTypes)
+       .def("getNumpyTruthDTypes", &trainData::getNumpyTruthDTypes)
+       .def("getNumpyWeightDTypes", &trainData::getNumpyWeightDTypes)
+
+       .def("getNumpyFeatureArrayNames", &trainData::getNumpyFeatureArrayNames)
+       .def("getNumpyTruthArrayNames", &trainData::getNumpyTruthArrayNames)
+       .def("getNumpyWeightArrayNames", &trainData::getNumpyWeightArrayNames)
+
+       .def("getTruthRaggedFlags", &trainData::getTruthRaggedFlags)
+       .def("transferFeatureListToNumpy", &trainData::transferFeatureListToNumpy)
+       .def("transferTruthListToNumpy", &trainData::transferTruthListToNumpy)
+       .def("transferWeightListToNumpy", &trainData::transferWeightListToNumpy)
+
+
+       .def("copyFeatureListToNumpy", &trainData::copyFeatureListToNumpy)
+       .def("copyTruthListToNumpy", &trainData::copyTruthListToNumpy)
+       .def("copyWeightListToNumpy", &trainData::copyWeightListToNumpy)
+
+;
+    ;
+}
+
+
+
diff --git a/compiled/src/c_trainDataGenerator.C b/compiled/src/c_trainDataGenerator.C
new file mode 100644
index 0000000..5e1ec2f
--- /dev/null
+++ b/compiled/src/c_trainDataGenerator.C
@@ -0,0 +1,59 @@
+/*
+ * c_dataGenerator.C
+ *
+ *  Created on: 7 Nov 2019
+ *      Author: jkiesele
+ */
+
+
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include "boost/python/str.hpp"
+#include <boost/python/exception_translator.hpp>
+
+
+#include "../interface/trainDataGenerator.h"
+
+#include <exception>
+
+#include <iostream> //debug
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+using namespace djc;
+
+
+BOOST_PYTHON_MODULE(c_trainDataGenerator) {
+    Py_Initialize();
+    np::initialize();
+    p::class_<trainDataGenerator >("trainDataGenerator")
+
+            .def("setBatchSize", &trainDataGenerator::setBatchSize)
+
+            .def("setFileList", &trainDataGenerator::setFileListPy)
+            .def("shuffleFileList", &trainDataGenerator::shuffleFileList)
+
+            .def("setBuffer", &trainDataGenerator::setBuffer)
+
+
+            .def("setFileTimeout", &trainDataGenerator::setFileTimeout)
+            .def("setSquaredElementsLimit", &trainDataGenerator::setSquaredElementsLimit)
+            .def("setSkipTooLargeBatches", &trainDataGenerator::setSkipTooLargeBatches)
+
+            .def("clear", &trainDataGenerator::clear)
+            .def("getNBatches", &trainDataGenerator::getNBatches)
+
+            .def("lastBatch", &trainDataGenerator::lastBatch)
+            .def("isEmpty", &trainDataGenerator::isEmpty)
+
+            .def("prepareNextEpoch", &trainDataGenerator::prepareNextEpoch)
+            .def("getBatch", &trainDataGenerator::getBatch)
+
+            .def("getNTotal", &trainDataGenerator::getNTotal)
+
+            .def_readwrite("debuglevel", &trainDataGenerator::debuglevel);
+        ;
+}
+
diff --git a/compiled/src/helper.cpp b/compiled/src/helper.cpp
index d394096..b17dd64 100644
--- a/compiled/src/helper.cpp
+++ b/compiled/src/helper.cpp
@@ -7,6 +7,13 @@
 
 
 #include "../interface/helper.h"
+#include <stdexcept>
+
+#include <iostream>
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+
 
 
 TString prependXRootD(const TString& path){
@@ -32,3 +39,18 @@ float deltaPhi(const float& a, const float& b){
     while (delta < -pi)  delta+= 2* pi;
     return delta;
 }
+
+
+
+void checkTObject(const TObject* o, TString msg){
+    TString mesg = msg;
+    mesg += ": " ;
+    const char * name  = o->GetName();
+    mesg += (TString)name;
+    if(!o || o->IsZombie()){
+        throw std::runtime_error(mesg.Data());
+    }
+
+
+
+}
diff --git a/compiled/src/rocCurve.cpp b/compiled/src/rocCurve.cpp
index 34aaf77..77c047a 100644
--- a/compiled/src/rocCurve.cpp
+++ b/compiled/src/rocCurve.cpp
@@ -44,10 +44,10 @@ static std::vector<double> loglist(double first, double last, double size){
 }
 
 
-rocCurve::rocCurve():nbins_(100),linecol_(kBlack),linewidth_(1),linestyle_(1),fullanalysis_(true){
+rocCurve::rocCurve():nbins_(100),linecol_(kBlack),linewidth_(1),linestyle_(1),fullanalysis_(true),yscale_(1.){
     nrocsCounter++;
 }
-rocCurve::rocCurve(const TString& name):nbins_(100),linecol_(kBlack),linewidth_(1),linestyle_(1),fullanalysis_(true){
+rocCurve::rocCurve(const TString& name):nbins_(100),linecol_(kBlack),linewidth_(1),linestyle_(1),fullanalysis_(true),yscale_(1.){
     name_=name;
 }
 rocCurve::rocCurve(const TString& name, const TString& probability, const TString& truth,
@@ -75,6 +75,7 @@ rocCurve::~rocCurve(){
     //empty for now
 }
 
+
 //now done in a simple tree-Draw way - if optmisation needed: switch to putting rocs in a loop (TBI)
 void rocCurve::process(TChain *c,std::ostream& out){
 
@@ -123,12 +124,31 @@ void rocCurve::process(TChain *c,std::ostream& out){
     TString nrcc="";
     nrcc+=nrocsCounter;
 
+    //the bins should be log scale towards high probabilities if nbins>200
+    //map over modified softsign
+
+
     TCanvas cv;//just a dummy
-    probh_=TH1D("prob"+nrcc,"prob"+nrcc,nbins_,0,1);
-    vetoh_=TH1D("veto"+nrcc,"veto"+nrcc,nbins_,0,1);
-    invalidate_=TH1D("invalid"+nrcc,"invalid"+nrcc,nbins_,0,1);
-    invalidate_veto_=TH1D("invalid_veto"+nrcc,"invalid_veto"+nrcc,nbins_,0,1);
+    if(nbins_<201){
+        probh_=TH1D("prob"+nrcc,"prob"+nrcc,nbins_,0,1.+0.00001);
+        vetoh_=TH1D("veto"+nrcc,"veto"+nrcc,nbins_,0,1.+0.00001);
+        invalidate_=TH1D("invalid"+nrcc,"invalid"+nrcc,nbins_,0,1.+0.00001);
+        invalidate_veto_=TH1D("invalid_veto"+nrcc,"invalid_veto"+nrcc,nbins_,0,1.+0.00001);
+    }
+    else{
+        std::vector<double> binning;
+        double scaler=20;
+        for(float i=0.;i<(float)nbins_ + 0.1;i++){
+            double x = i / ((float)nbins_-1.)+0.000001;
+            double bc = scaledSoftsign(x,scaler);
+            binning.push_back(bc);
+        }
+        probh_=TH1D("prob"+nrcc,"prob"+nrcc,nbins_,&binning.at(0));
+        vetoh_=TH1D("veto"+nrcc,"veto"+nrcc,nbins_,&binning.at(0));
+        invalidate_=TH1D("invalid"+nrcc,"invalid"+nrcc,nbins_,&binning.at(0));
+        invalidate_veto_=TH1D("invalid_veto"+nrcc,"invalid_veto"+nrcc,nbins_,&binning.at(0));
 
+    }
 
     c->Draw(probstr+">>prob"+nrcc,allcuts);//probcuts);
     c->Draw(probstr+">>veto"+nrcc,vetostr);
@@ -162,7 +182,7 @@ void rocCurve::process(TChain *c,std::ostream& out){
 
     for(size_t i=0;i<nbins_;i++) {
         p[i] = probh_.Integral(i,nbins_)/(probintegral);
-        v[i] = vetoh_.Integral(i,nbins_)/vetointegral;
+        v[i] = yscale_*vetoh_.Integral(i,nbins_)/vetointegral;
     }
     TString compatname=name_;
     compatname.ReplaceAll(" ","_");
@@ -177,7 +197,7 @@ void rocCurve::process(TChain *c,std::ostream& out){
 
 
     out << "eff @ misid @ discr value\n\n";
-    std::vector<double> misidset=loglist(0.001,1,100);
+    std::vector<double> misidset=loglist(yscale_*0.00001,yscale_*1,yscale_*100);
     int count=0;
     double integral=0;
     for(float eff=0;eff<1;eff+=0.00001){
@@ -208,6 +228,12 @@ void rocCurve::process(TChain *c,std::ostream& out){
 }
 
 
+double rocCurve::scaledSoftsign(double x, double d)const{
+    double norm = 1. /(d / (1+ fabs(d)));
+    double softs = d*x /(1 + fabs(d*x));
+    return norm*softs;
+}
+
 
 
 
diff --git a/compiled/src/rocCurveCollection.cpp b/compiled/src/rocCurveCollection.cpp
index 678e98e..8d5e546 100644
--- a/compiled/src/rocCurveCollection.cpp
+++ b/compiled/src/rocCurveCollection.cpp
@@ -30,7 +30,7 @@ void rocCurveCollection::addExtraLegendEntry(const TString& entr){
 }
 
 void rocCurveCollection::addROC(const TString& name, const TString& probability, const TString& truth,
-        const TString& vetotruth, const TString& linecol, const TString& cuts,const TString& invalidateif){
+        const TString& vetotruth, const TString& linecol, const TString& cuts,const TString& invalidateif, float yscale){
 
     rocCurve rc=rocCurve(name,probability,truth,vetotruth,colorToTColor(linecol),lineToTLineStyle(linecol),cuts,invalidateif);
     rc.setLineWidth(linewidth_);
@@ -38,13 +38,15 @@ void rocCurveCollection::addROC(const TString& name, const TString& probability,
     lc.ToLower() ;
     if(lc.Contains("dummy"))
         rc.setLineWidth(0);
+    rc.scaleYAxis(yscale);
     roccurves_.push_back(rc);
     legentries_.push_back(name);
 }
 
 
 void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
-        const TString&outfile, TCanvas* cv, TFile * f, std::vector<TChain*>* chainvec,double xmin_in){
+        const TString&outfile, TCanvas* cv, TFile * f, std::vector<TChain*>* chainvec,const double xmin_in,
+		TString experimentlabel,TString lumilabel,TString prelimlabel){
 
     gROOT->SetBatch();
 
@@ -56,6 +58,7 @@ void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
         createFile=true;
     }
     size_t count=0;
+    float maxyscale=0.;
     std::vector<TH1D*> probhistos,vetohistos,invalidhistos,invalidvetohistos;
     for(size_t i=0;i<roccurves_.size();i++){
         rocCurve& rc=roccurves_.at(i);
@@ -81,6 +84,8 @@ void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
         tempname+=count;
         TH1D* hd=(TH1D*)rc.getInvalidatedHisto()->Clone(tempname);
         invalidvetohistos.push_back(hd);
+        if(rc.getYAxisScaling()>maxyscale)
+            maxyscale=rc.getYAxisScaling();
     }
 
 
@@ -99,9 +104,9 @@ void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
     cv->Draw();
     cv->cd();
 
-    TH1D haxis=TH1D("AXIS","AXIS",10,0,1);
+    TH1D haxis=TH1D("AXIS","AXIS",10,0,1.);
     //haxis.Draw("AXIS");
-    haxis.GetYaxis()->SetRangeUser(8e-4,1);
+    haxis.GetYaxis()->SetRangeUser(maxyscale*8e-6,maxyscale);
     //haxis.GetYaxis()->SetNdivisions(510);
 
     haxis.GetYaxis()->SetTitleSize(0.05);
@@ -184,35 +189,38 @@ void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
         haxis.GetXaxis()->SetRangeUser(xmin,1);
 
     if(cmsstyle_){
+    	if(experimentlabel.Length()<1)experimentlabel="CMS Simulation";
+    	if(prelimlabel.Length()<1)prelimlabel="#it{Preliminary}";
+    	if(lumilabel.Length()<1)lumilabel="#sqrt{s}=13 TeV, Phase 1";
+    }
+
+
+    //add CMS labels
+    TLatex *tex = new TLatex(0.18,0.855,experimentlabel);
+    tex->SetNDC(true);
+    tex->SetTextFont(61);
+    tex->SetTextSize(0.08);
+    tex->SetLineWidth(2);
+    tex->Draw();
+
+
+    tex = new TLatex(0.57,0.865,prelimlabel);
+    tex->SetNDC(true);
+    tex->SetTextFont(42);
+    tex->SetTextSize(0.05);
+    tex->SetLineWidth(2);
+    tex->Draw();
+
+
+    tex = new TLatex(.97,0.955,lumilabel);
+    tex->SetNDC(true);
+    tex->SetTextAlign(31);
+    tex->SetTextFont(42);
+    tex->SetTextSize(0.05);
+    tex->SetLineWidth(2);
+    tex->Draw();
 
 
-        //add CMS labels
-        TLatex *tex = new TLatex(0.18,0.865,"CMS Simulation");
-        tex->SetNDC(true);
-        tex->SetTextFont(61);
-        tex->SetTextSize(0.08);
-        tex->SetLineWidth(2);
-        tex->Draw();
-
-        tex = new TLatex(0.57,0.865,"#it{Preliminary}");
-        tex->SetNDC(true);
-        tex->SetTextFont(42);
-        tex->SetTextSize(0.05);
-        tex->SetLineWidth(2);
-        tex->Draw();
-
-        tex = new TLatex(.97,0.955,"#sqrt{s}=13 TeV, Phase 1");
-        tex->SetNDC(true);
-        tex->SetTextAlign(31);
-        tex->SetTextFont(42);
-        tex->SetTextSize(0.05);
-        tex->SetLineWidth(2);
-        tex->Draw();
-
-        //haxis.GetXaxis()->SetTitle("b-jet efficiency");
-
-        haxis.GetXaxis()->SetRangeUser(0,1);
-    }
     leg_->Draw("same");
     ///////
     if(extralegendtries_.size()){
@@ -238,7 +246,7 @@ void rocCurveCollection::printRocs(TChain* c, const TString& outpdf,
     for(auto& t:additionaltext_)
         t->Draw();
     //comment lines
-    TLatex *tex = new TLatex(0.18,0.805,comment0_);
+    tex = new TLatex(0.18,0.805,comment0_);
     tex->SetNDC(true);
     tex->SetTextFont(42);
     tex->SetTextSize(0.05);
diff --git a/compiled/src/simpleArray.cpp b/compiled/src/simpleArray.cpp
new file mode 100644
index 0000000..73966f1
--- /dev/null
+++ b/compiled/src/simpleArray.cpp
@@ -0,0 +1,354 @@
+#include "../interface/simpleArray.h"
+#include <vector>
+
+
+namespace djc{
+/**
+    * Split indices can directly be used with the split() function.
+    * Returns e.g. {2,5,3,2}, which corresponds to DataSplitIndices of {2,7,10,12}
+    */
+
+std::vector<size_t>  simpleArrayBase::getSplitIndices(const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+        bool sqelementslimit, bool strict_limit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split){
+    return priv_getSplitIndices(false, rowsplits, nelements_limit, sqelementslimit,  size_ok, nelemtns_per_split,strict_limit);
+}
+
+/**
+ * Split indices can directly be used with the split() function.
+ * Returns row splits e.g. {2,7,10,12} which corresponds to Split indices of {2,5,3,2}
+ */
+
+
+std::vector<size_t>  simpleArrayBase::getDataSplitIndices(const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+        bool sqelementslimit, bool strict_limit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split){
+    return priv_getSplitIndices(true, rowsplits, nelements_limit, sqelementslimit,  size_ok, nelemtns_per_split,strict_limit);
+}
+
+/**
+ * Transforms row splits to n_elements per ragged sample
+ */
+
+std::vector<int64_t>  simpleArrayBase::dataSplitToSplitIndices(const std::vector<int64_t>& row_splits){
+    if(!row_splits.size())
+        throw std::runtime_error("simpleArrayBase::dataSplitToSplitIndices: row splits empty");
+    auto out = std::vector<int64_t>(row_splits.size()-1);
+    for(size_t i=0;i<out.size();i++){
+        out.at(i) = row_splits.at(i+1)-row_splits.at(i);
+    }
+    return out;
+}
+
+/**
+ * Transforms n_elements per ragged sample to row splits
+ */
+
+std::vector<int64_t>  simpleArrayBase::splitToDataSplitIndices(const std::vector<int64_t>& n_elements){
+    auto out = std::vector<int64_t>(n_elements.size()+1);
+    out.at(0)=0;
+    int64_t last=0;
+    for(size_t i=0;i<n_elements.size();i++){
+        out.at(i+1) = last + n_elements.at(i);
+        last = out.at(i+1);
+    }
+    return out;
+}
+
+simpleArrayBase::simpleArrayBase(std::vector<int> shape,const std::vector<int64_t>& rowsplits) {
+    assigned_=false;
+    shape_ = shape;
+    if(rowsplits.size()){
+        if(rowsplits.size() != shape_.at(0)+1)
+            throw std::runtime_error("simpleArrayBase::simpleArrayBase: rowsplits.size() must equal shape[0] + 1");
+
+        rowsplits_=rowsplits;
+        shape_ = shapeFromRowsplits();
+    }
+    size_ = sizeFromShape(shape_);
+}
+
+
+
+std::string simpleArrayBase::dtypeToString(dtypes t){
+    if(t==float32)
+        return "float32";
+    else if(t==int32)
+        return "int32";
+    else
+        return "undef";
+}
+simpleArrayBase::dtypes simpleArrayBase::stringToDtype(const std::string& s){
+    if(s=="float32")
+        return float32;
+    else if(s=="int32")
+        return int32;
+    else
+        throw std::runtime_error("simpleArrayBase::dtypes simpleArrayBase::stringToDtype unknown dtype");
+}
+
+boost::python::list simpleArrayBase::shapePy()const{
+    boost::python::list l;
+    for(const auto& s: shape_)
+        l.append(s);
+    return l;
+}
+
+std::string simpleArrayBase::readDtypeFromFileP(FILE *& ifile)const{
+    return dtypeToString(readDtypeTypeFromFileP(ifile));
+}
+
+std::string simpleArrayBase::readDtypeFromFile(const std::string& f)const{
+    return dtypeToString(readDtypeTypeFromFile(f));
+}
+
+simpleArrayBase::dtypes simpleArrayBase::readDtypeTypeFromFileP(FILE *& ifile)const{
+    long pos = ftell(ifile);
+
+    float version = 0;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version)){//compat
+            throw std::runtime_error("simpleArrayBase::readDtypeTypeFromFileP: wrong format version");
+    }
+    dtypes dt=float32;
+    if(checkVersionStrict(version))
+        io::readFromFile(&dt, ifile);
+    fseek(ifile,pos-ftell(ifile),SEEK_CUR);//go back
+    return dt;
+}
+
+simpleArrayBase::dtypes simpleArrayBase::readDtypeTypeFromFile(const std::string& f)const{
+    FILE *ifile = fopen(f.data(), "rb");
+    if(!ifile)
+        throw std::runtime_error("simpleArrayBase::readDtypeTypeFromFile: file "+f+" could not be opened.");
+    float version = 0;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version))
+        throw std::runtime_error("simpleArrayBase::readDtypeTypeFromFile: wrong format version");
+    auto type = readDtypeTypeFromFileP(ifile);
+    fclose(ifile);
+    return type;
+}
+
+
+std::vector<int64_t> simpleArrayBase::readRowSplitsFromFileP(FILE *& ifile, bool seeknext){
+
+    float version = 0;
+    size_t size;
+    std::vector<int> shape;
+    std::vector<int64_t> rowsplits;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version))
+        throw std::runtime_error("simpleArrayBase::readRowSplitsFromFileP: wrong format version");
+    if(checkVersionStrict(version)){
+        dtypes rdtype;
+        std::string namedummy;
+        std::vector<std::string> featnamedummy;
+        io::readFromFile(&rdtype, ifile);
+        io::readFromFile(&namedummy, ifile);
+        io::readFromFile(&featnamedummy, ifile);
+    }
+
+    io::readFromFile(&size, ifile);
+
+    size_t shapesize = 0;
+    io::readFromFile(&shapesize, ifile);
+    shape = std::vector<int>(shapesize, 0);
+    io::readFromFile(&shape[0], ifile, shapesize);
+
+    size_t rssize = 0;
+    io::readFromFile(&rssize, ifile);
+    rowsplits = std::vector<int64_t>(rssize, 0);
+
+    if(rssize){
+        quicklz<int64_t> iqlz;
+        iqlz.readAll(ifile, &rowsplits[0]);
+    }
+    if(seeknext){
+        quicklz<float> qlz;//template arg does not matter here
+        qlz.skipBlock(ifile);//sets file point to next item
+    }
+    return rowsplits;
+}
+
+
+void simpleArrayBase::skipToNextArray(FILE *& ofile)const{
+    readRowSplitsFromFileP(ofile,true);
+}
+
+
+std::vector<int64_t> simpleArrayBase::mergeRowSplits(const std::vector<int64_t> & rowsplitsa, const std::vector<int64_t> & rowsplitsb){
+    if(rowsplitsb.size()<1)
+        return rowsplitsa;
+    if(rowsplitsa.size()<1)
+        return rowsplitsb;
+    std::vector<int64_t> out=rowsplitsa;
+    out.resize(out.size() + rowsplitsb.size()-1);
+    int64_t lasta = rowsplitsa.at(rowsplitsa.size()-1);
+
+    for(size_t i=0;i<rowsplitsb.size();i++)
+        out.at(i + rowsplitsa.size() - 1) = lasta + rowsplitsb.at(i);
+
+    return out;
+}
+
+
+std::vector<int64_t> simpleArrayBase::splitRowSplits(std::vector<int64_t> & rowsplits, const size_t& splitpoint){
+
+    if(splitpoint >= rowsplits.size())
+        throw std::out_of_range("simpleArrayBase::splitRowSplits: split index out of range");
+
+    int64_t rsatsplitpoint = rowsplits.at(splitpoint);
+    std::vector<int64_t> out = std::vector<int64_t> (rowsplits.begin(),rowsplits.begin()+splitpoint+1);
+    std::vector<int64_t> rhs = std::vector<int64_t>(rowsplits.size()-splitpoint);
+    for(size_t i=0;i<rhs.size();i++)
+        rhs.at(i) = rowsplits.at(splitpoint+i) - rsatsplitpoint;
+
+    rowsplits = rhs;
+    return out;
+}
+
+
+ std::vector<size_t>  simpleArrayBase::priv_getSplitIndices(bool datasplit, const std::vector<int64_t> & rowsplits, size_t nelements_limit,
+        bool sqelementslimit, std::vector<bool>& size_ok, std::vector<size_t>& nelemtns_per_split, bool strict_limit){
+
+    std::vector<size_t> outIdxs;
+    size_ok.clear();
+    nelemtns_per_split.clear();
+    if(rowsplits.size()<1)
+        return outIdxs;
+
+    size_t i_old=0;
+    size_t s_old = 0;
+    size_t i_s = 0;
+    while (true) {
+
+        size_t s = rowsplits.at(i_s);
+        size_t delta = s - s_old;
+        size_t i_splitat = rowsplits.size()+1;
+
+        if (sqelementslimit)
+            delta *= delta;
+
+        if (delta > nelements_limit && i_s != i_old+1) {
+            i_splitat = i_s - 1;
+            i_s--;
+        }
+        else if (delta == nelements_limit ||
+                i_s == rowsplits.size() - 1 ||
+                (delta > nelements_limit && i_s == i_old+1)) {
+            i_splitat = i_s;
+        }
+
+
+        if (i_splitat < rowsplits.size() ) {        //split
+
+            if(i_splitat==i_old){
+                //sanity check, should not happen
+                std::cout <<"simpleArrayBase::priv_getSplitIndices: attempting empty split at " << i_splitat << std::endl;
+                throw std::runtime_error("simpleArrayBase::priv_getSplitIndices: attempting empty split");
+            }
+
+
+            size_t nelements = rowsplits.at(i_splitat) - rowsplits.at(i_old);
+            bool is_good = (!strict_limit || nelements <= nelements_limit) && nelements>0;//safety for zero element splits
+            size_ok.push_back(is_good);
+            nelemtns_per_split.push_back(nelements);
+
+            if(datasplit)
+                outIdxs.push_back(i_splitat);
+            else
+                outIdxs.push_back(i_splitat - i_old);
+
+
+            //std::cout << "i_old " << i_old << "\n";
+            //std::cout << "i_s " << i_s << "\n";
+            //std::cout << "s_old " << s_old << "\n";
+            //std::cout << "s " << s << "\n";
+            //std::cout << "i_splitat " << i_splitat << "\n";
+            //std::cout << "is_good " << is_good << "\n";
+            //std::cout << "i_splitat - i_old " << i_splitat - i_old << "\n";
+            //std::cout << std::endl;
+
+            i_old = i_splitat;
+            s_old = rowsplits.at(i_old);
+            //i_s = i_splitat;
+
+        }
+        i_s++;
+        if(i_s >= rowsplits.size())
+            break;
+    }
+
+    return outIdxs;
+}
+
+
+void simpleArrayBase::getFlatSplitPoints(size_t splitindex_begin, size_t splitindex_end,
+        size_t & splitpoint_start, size_t & splitpoint_end)const{
+    splitpoint_start = splitindex_begin;
+    splitpoint_end = splitindex_end;
+    if(isRagged()){
+        splitpoint_start = rowsplits_.at(splitindex_begin);
+        splitpoint_end = rowsplits_.at(splitindex_end);
+        for (size_t i = 2; i < shape_.size(); i++){
+            splitpoint_start *= (size_t)std::abs(shape_.at(i));
+            splitpoint_end   *= (size_t)std::abs(shape_.at(i));
+        }
+    }
+    else{
+        for (size_t i = 1; i < shape_.size(); i++){
+            splitpoint_start *= (size_t)std::abs(shape_.at(i));
+            splitpoint_end   *= (size_t)std::abs(shape_.at(i));
+        }
+    }
+}
+
+
+
+size_t simpleArrayBase::sizeFromShape(const std::vector<int>& shape) const {
+    int64_t size = 1;
+    for (const auto s : shape){
+        size *= std::abs(s);
+        if(s<0)
+            size=std::abs(s);//first ragged dimension has the full size of previous dimensions
+    }
+    return size;
+}
+
+
+std::vector<int> simpleArrayBase::shapeFromRowsplits()const{
+    if(!isRagged()) return shape_;
+    if(shape_.size()<2) return shape_;
+    auto outshape = shape_;
+    //rowsplits.size = nbatch+1
+    outshape.at(1) = - (int)rowsplits_.at(rowsplits_.size()-1);
+    return outshape;
+}
+
+
+void simpleArrayBase::checkShape(size_t ndims)const{
+    //rowsplit ready due to definiton of shape
+    if(ndims != shape_.size()){
+        throw std::out_of_range("simpleArrayBase::checkShape: shape does not match dimensions accessed ("+
+                std::to_string(ndims)+"/"+std::to_string(shape_.size())+") "+name_);
+    }
+}
+
+
+void simpleArrayBase::checkSize(size_t idx)const{
+    if(idx >= size_)
+        throw std::out_of_range("simpleArrayBase::checkSize: index out of range");
+}
+
+
+void simpleArrayBase::checkRaggedIndex(size_t i, size_t j)const{
+    if(i > rowsplits_.size()-1 || j >= rowsplits_.at(i+1)-rowsplits_.at(i))
+        throw std::out_of_range("simpleArrayBase::checkRaggedIndex: index out of range");
+}
+
+
+template<>
+simpleArrayBase::dtypes simpleArray_float32::dtype()const{return float32;}
+template<>
+simpleArrayBase::dtypes simpleArray_int32::dtype()const{return int32;}
+
+}//ns
diff --git a/compiled/src/simpleArrayFiller.cpp b/compiled/src/simpleArrayFiller.cpp
new file mode 100644
index 0000000..9a99efd
--- /dev/null
+++ b/compiled/src/simpleArrayFiller.cpp
@@ -0,0 +1,94 @@
+
+#include "../interface/simpleArrayFiller.h"
+
+namespace djc{
+
+
+simpleArrayFiller::simpleArrayFiller(
+        const std::string name,
+        const std::vector<int>& shape,
+        simpleArrayBase::dtypes type,
+        dataUsage dusage,
+        bool isragged,
+        const std::vector<std::string>& featurenames){
+
+    dusage_=dusage;
+    isragged_=isragged;
+
+    if(type==simpleArrayBase::float32)
+        prototype_= new simpleArray_float32(shape);
+    else if(type==simpleArrayBase::int32)
+        prototype_= new simpleArray_int32(shape);
+    else
+        throw std::invalid_argument("simpleArrayStreamer::init: unsupported dtype");
+
+    prototype_->setFeatureNames(featurenames);
+    prototype_->setName(name);
+
+    current_=0;
+    if(isragged_)
+        rowsplits_.push_back(0);
+    newCurrentArray();
+
+}
+
+void simpleArrayFiller::clear(){
+    clearData();
+    if(prototype_)
+        delete prototype_;
+    prototype_=0;
+}
+
+void simpleArrayFiller::clearData(){//keep prototype
+    for(auto& a:arrays_)
+        delete a;
+    arrays_.clear();
+    rowsplits_.clear();
+    if(isragged_)
+        rowsplits_.push_back(0);
+    if(current_)
+        delete current_;
+    current_=0;
+    newCurrentArray();
+}
+
+
+void simpleArrayFiller::fillEvent(){
+    rowsplits_.push_back(arrays_.size());
+}
+
+
+simpleArrayBase * simpleArrayFiller::copyToFullArray()const{
+    if(prototype_->dtype() == simpleArrayBase::float32){
+       return priv_copyToFullArray<simpleArray_float32>();
+    }
+    else if(prototype_->dtype() == simpleArrayBase::int32){
+        return priv_copyToFullArray<simpleArray_int32>();
+    }
+    else
+        throw std::runtime_error("simpleArrayStreamer::copyToFullArray: unrecognised type");
+    return 0;
+}
+
+size_t simpleArrayFiller::memSizeKB()const{
+    size_t vsize = arrays_.size();
+    size_t pts = prototype_->size();
+    size_t datasize = vsize*pts + pts;
+
+    datasize *= sizeof(float); //int32 and float32 both 4 bytes
+
+    size_t rssize = rowsplits_.size() * sizeof(int64_t);
+
+    return (datasize + rssize)/1024;
+}
+
+void simpleArrayFiller::newCurrentArray(){
+    if(prototype_->dtype() == simpleArrayBase::float32)
+        current_ = new simpleArray_float32((*(simpleArray_float32*)prototype_));
+    else if(prototype_->dtype() == simpleArrayBase::int32)
+        current_ = new simpleArray_int32((*(simpleArray_int32*)prototype_));
+}
+
+}//djc
+
+
diff --git a/compiled/src/trainData.cpp b/compiled/src/trainData.cpp
new file mode 100644
index 0000000..33bef3b
--- /dev/null
+++ b/compiled/src/trainData.cpp
@@ -0,0 +1,624 @@
+#include "../interface/trainData.h"
+
+
+namespace djc{
+
+/*
+ *
+    std::vector<simpleArray_float32> farrs_;
+    std::vector<simpleArray_int32> iarrs_;
+
+    enum typesorting{isfloat,isint};
+    std::vector<std::pair<typesorting,size_t> > sorting_;
+ */
+
+void typeContainer::push_back(simpleArrayBase& a){
+    if(a.dtype() == simpleArrayBase::float32){
+        farrs_.push_back(dynamic_cast<simpleArray_float32&>(a));
+        sorting_.push_back({isfloat,farrs_.size()-1});
+    }
+    else {//if(a.dtype() == simpleArrayBase::int32){
+        iarrs_.push_back(dynamic_cast<simpleArray_int32&>(a));
+        sorting_.push_back({isint,iarrs_.size()-1});
+    }
+}
+void typeContainer::move_back(simpleArrayBase& a){
+    if(a.dtype() == simpleArrayBase::float32){
+        farrs_.push_back(std::move(dynamic_cast<simpleArray_float32&>(a)));
+        sorting_.push_back({isfloat,farrs_.size()-1});
+    }
+    else {//if(a.dtype() == simpleArrayBase::int32){
+        iarrs_.push_back(std::move(dynamic_cast<simpleArray_int32&>(a)));
+        sorting_.push_back({isint,iarrs_.size()-1});
+    }
+}
+bool typeContainer::operator==(const typeContainer& rhs)const{
+    if(size() != rhs.size())
+        return false;
+    if(farrs_.size() != rhs.farrs_.size())
+        return false;
+
+    if(sorting_ != rhs.sorting_)
+        return false;
+
+    for(size_t i=0;i<farrs_.size();i++){
+        if(farrs_.at(i) != rhs.farrs_.at(i))
+            return false;
+    }
+    for(size_t i=0;i<iarrs_.size();i++){
+        if(iarrs_.at(i) != rhs.iarrs_.at(i))
+            return false;
+    }
+    return true;
+}
+simpleArrayBase& typeContainer::at(size_t idx){
+    if(idx>=sorting_.size())
+        throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(sorting_.size()));
+    auto s = sorting_.at(idx);
+    if(s.first == isfloat)
+        return farrs_.at(s.second);
+    else //if(s.first == isint)
+        return iarrs_.at(s.second);
+
+}
+const simpleArrayBase& typeContainer::at(size_t idx)const{
+    if(idx>=sorting_.size())
+        throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(sorting_.size()));
+    auto s = sorting_.at(idx);
+    if(s.first == isfloat)
+        return farrs_.at(s.second);
+    else //if(s.first == isint)
+        return iarrs_.at(s.second);
+}
+
+
+simpleArray_float32& typeContainer::at_asfloat32(size_t idx){
+    if(at(idx).dtype() != simpleArrayBase::float32)
+        throw std::runtime_error("typeContainer::at_asfloat32: is not float32");
+    return dynamic_cast<simpleArray_float32&>(at(idx));
+}
+
+const simpleArray_float32& typeContainer::at_asfloat32(size_t idx)const{
+    if(at(idx).dtype() != simpleArrayBase::float32)
+        throw std::runtime_error("typeContainer::at_asfloat32: is not float32");
+    return dynamic_cast<const simpleArray_float32&>(at(idx));
+}
+
+simpleArray_int32& typeContainer::at_asint32(size_t idx){
+    if(at(idx).dtype() != simpleArrayBase::int32)
+        throw std::runtime_error("typeContainer::at_asfloat32: is not float32");
+    return dynamic_cast<simpleArray_int32&>(at(idx));
+}
+
+const simpleArray_int32& typeContainer::at_asint32(size_t idx)const{
+    if(at(idx).dtype() != simpleArrayBase::int32)
+        throw std::runtime_error("typeContainer::at_asfloat32: is not float32");
+    return dynamic_cast<const simpleArray_int32&>(at(idx));
+}
+
+
+void typeContainer::clear(){
+    farrs_.clear();
+    iarrs_.clear();
+    sorting_.clear();
+}
+
+void typeContainer::writeToFile(FILE *& ofile) const{
+    size_t isize=size();
+    io::writeToFile(&isize,ofile);
+    for(const auto& i: sorting_){
+        if(i.first == isfloat){
+            farrs_.at(i.second).addToFileP(ofile);
+        }
+        else {// if(i.first == isint){
+            iarrs_.at(i.second).addToFileP(ofile);
+        }
+    }
+}
+
+void typeContainer::readFromFile_priv(FILE *& ifile, bool justmetadata){
+    clear();
+    size_t isize = 0;
+    io::readFromFile(&isize,ifile);
+    for(size_t i=0;i<isize;i++){
+        simpleArray_float32 tmp;//type doesn't matter
+        auto dtype = tmp.readDtypeTypeFromFileP(ifile);
+        if(dtype == simpleArrayBase::float32){
+            simpleArray_float32 farr;
+            farr.readFromFileP(ifile,justmetadata);
+            move_back(farr);
+        }
+        else{ //if(dtype==simpleArrayBase::int32){
+            simpleArray_int32 iarr;
+            iarr.readFromFileP(ifile,justmetadata);
+            move_back(iarr);
+        }
+    }
+}
+
+////////////////// trainData //////////////////////
+
+bool trainData::operator==(const trainData& rhs)const{
+
+    if(feature_arrays_ != rhs.feature_arrays_)
+        return false;
+    if(truth_arrays_ != rhs.truth_arrays_)
+        return false;
+    if(weight_arrays_ != rhs.weight_arrays_)
+        return false;
+    if(feature_shapes_ != rhs.feature_shapes_)
+        return false;
+    if(truth_shapes_ != rhs.truth_shapes_)
+        return false;
+    if(weight_shapes_ != rhs. weight_shapes_)
+        return false;
+    return true;
+}
+
+
+int trainData::storeFeatureArray(simpleArrayBase & a){
+    size_t idx = feature_arrays_.size();
+    feature_arrays_.move_back(a);
+    a.clear();
+    updateShapes();
+    return idx;
+}
+
+int trainData::storeTruthArray(simpleArrayBase& a){
+    size_t idx = truth_arrays_.size();
+    truth_arrays_.move_back(a);
+    a.clear();
+    updateShapes();
+    return idx;
+}
+
+int trainData::storeWeightArray(simpleArrayBase & a){
+    size_t idx = weight_arrays_.size();
+    weight_arrays_.move_back(a);
+    a.clear();
+    updateShapes();
+    return idx;
+}
+
+void trainData::truncate(size_t position){
+    *this = split(position);
+}
+
+
+void trainData::append(const trainData& td) {
+    //allow empty append
+    if (!feature_arrays_.size() && !truth_arrays_.size()
+            && !weight_arrays_.size()) {
+        *this = td;
+        return;
+    }
+    if(!td.feature_arrays_.size() && !td.truth_arrays_.size()
+            && !td.weight_arrays_.size()){
+        return ; //nothing to do
+    }
+    if (feature_arrays_.size() != td.feature_arrays_.size()
+            || truth_arrays_.size() != td.truth_arrays_.size()
+            || weight_arrays_.size() != td.weight_arrays_.size()) {
+        std::cout << "nfeat " << feature_arrays_.size() << "-" << td.feature_arrays_.size() <<'\n'
+                << "ntruth " << truth_arrays_.size() << "-" << td.truth_arrays_.size()<<'\n'
+                << "nweights " << weight_arrays_.size() << "-" <<  td.weight_arrays_.size() <<std::endl;
+        throw std::out_of_range("trainData<T>::append: format not compatible.");
+    }
+    for(size_t i=0;i<feature_arrays_.size();i++)
+        feature_arrays_.at(i).append(td.feature_arrays_.at(i));
+    for(size_t i=0;i<truth_arrays_.size();i++)
+        truth_arrays_.at(i).append(td.truth_arrays_.at(i));
+    for(size_t i=0;i<weight_arrays_.size();i++)
+        weight_arrays_.at(i).append(td.weight_arrays_.at(i));
+    updateShapes();
+}
+
+
+trainData trainData::split(size_t splitindex) {
+    trainData out;
+
+    std::vector<std::pair< typeContainer* , typeContainer*> > vv = {
+            {&feature_arrays_, &out.feature_arrays_},
+            {&truth_arrays_, &out.truth_arrays_},
+            {&weight_arrays_, &out.weight_arrays_}};
+    for(auto& a: vv){
+        for (size_t i=0;i<a.first->size();i++){
+            if(a.first->dtype(i) == simpleArrayBase::float32){
+                auto split = a.first->at_asfloat32(i).split(splitindex);
+                a.second->push_back(split);
+            }
+            else if(a.first->dtype(i) == simpleArrayBase::int32){
+                auto split = a.first->at_asint32(i).split(splitindex);
+                a.second->push_back(split);
+            }
+            else{
+                throw std::runtime_error("trainData::split: do not understand dtype");
+            }
+        }
+    }
+
+    updateShapes();
+    out.updateShapes();
+    return out;
+}
+
+trainData trainData::getSlice(size_t splitindex_begin, size_t splitindex_end)const{
+    trainData out;
+
+
+    std::vector<std::pair<const typeContainer* , typeContainer*> > vv = {
+            {&feature_arrays_, &out.feature_arrays_},
+            {&truth_arrays_, &out.truth_arrays_},
+            {&weight_arrays_, &out.weight_arrays_}};
+    for(auto& a: vv){
+        for (size_t i=0;i<a.first->size();i++){
+            if(a.first->dtype(i) == simpleArrayBase::float32){
+                auto split = a.first->at_asfloat32(i).getSlice(splitindex_begin,splitindex_end);
+                a.second->push_back(split);
+            }
+            else if(a.first->dtype(i) == simpleArrayBase::int32){
+                auto split = a.first->at_asint32(i).getSlice(splitindex_begin,splitindex_end);
+                a.second->push_back(split);
+            }
+        }
+    }
+
+    out.updateShapes();
+    return out;
+}
+
+trainData trainData::shuffle(const std::vector<size_t>& shuffle_idxs)const{
+    trainData out;
+    std::vector<std::pair<const typeContainer* , typeContainer*> > vv = {
+            {&feature_arrays_, &out.feature_arrays_},
+            {&truth_arrays_, &out.truth_arrays_},
+            {&weight_arrays_, &out.weight_arrays_}};
+    for(auto& a: vv){
+        for (size_t i=0;i<a.first->size();i++){
+            if(a.first->dtype(i) == simpleArrayBase::float32){
+                auto split = a.first->at_asfloat32(i).shuffle(shuffle_idxs);
+                a.second->push_back(split);
+            }
+            else if(a.first->dtype(i) == simpleArrayBase::int32){
+                auto split = a.first->at_asint32(i).shuffle(shuffle_idxs);
+                a.second->push_back(split);
+            }
+            else{
+                throw std::runtime_error("trainData::shuffle: do not understnad dtype");
+            }
+        }
+    }
+
+    out.updateShapes();
+    return out;
+
+}
+
+bool trainData::validSlice(size_t splitindex_begin, size_t splitindex_end)const{
+
+    const std::vector<const typeContainer* > vv = {&feature_arrays_, &truth_arrays_, &weight_arrays_};
+    for(const auto& a: vv)
+        for (size_t i=0;i<a->size();i++)
+            if(! a->at(i).validSlice(splitindex_begin,splitindex_end))
+                return false;
+
+    return true;
+}
+
+void trainData::writeToFile(std::string filename)const{
+
+    FILE *ofile = fopen(filename.data(), "wb");
+    addToFileP(ofile);
+    fclose(ofile);
+
+}
+
+void trainData::addToFile(std::string filename)const{
+
+    FILE *ofile = fopen(filename.data(), "ab");
+    addToFileP(ofile);
+    fclose(ofile);
+}
+
+void trainData::addToFileP(FILE *& ofile)const{
+    float version = DJCDATAVERSION;
+    io::writeToFile(&version, ofile);
+
+    //shape infos only
+    writeNested(getShapes(feature_arrays_), ofile);
+    writeNested(getShapes(truth_arrays_), ofile);
+    writeNested(getShapes(weight_arrays_), ofile);
+
+    //data
+    feature_arrays_.writeToFile(ofile);
+    truth_arrays_.writeToFile(ofile);
+    weight_arrays_.writeToFile(ofile);
+}
+
+void trainData::priv_readFromFile(std::string filename, bool memcp){
+    clear();
+    FILE *ifile = fopen(filename.data(), "rb");
+    char *buf = 0;
+    if(false && memcp){
+        FILE *diskfile = ifile;
+        //check if exists before trying to memcp.
+        checkFile(ifile, filename); //not set at start but won't be used
+
+        fseek(diskfile, 0, SEEK_END);
+        size_t fsize = ftell(diskfile);
+        fseek(diskfile, 0, SEEK_SET);  /* same as rewind(f); */
+
+        buf = new char[fsize];
+        int ret = fread(buf, 1, fsize, diskfile);
+        if(!ret){
+            delete buf;
+            throw std::runtime_error("trainData<T>::readFromFile: could not read file in memcp mode");
+        }
+        fclose(diskfile);
+
+        ifile = fmemopen(buf,fsize,"r");
+    }
+
+    priv_readSelfFromFileP(ifile,filename);
+    //check for eof and add until done. the append step can be heavily optimized! FIXME
+    //read one more byte
+    int ch = getc(ifile);
+    while(! feof(ifile)){
+        fseek(ifile,-1,SEEK_CUR);
+        append(priv_readFromFileP(ifile,filename));
+        ch = getc(ifile);
+    }
+
+    fclose(ifile);
+    if(buf){
+        delete buf;
+    }
+}
+
+trainData trainData::priv_readFromFileP(FILE *& ifile, const std::string& filename)const{
+    //include file version check
+    trainData out;
+    out.checkFile(ifile, filename);
+    out.readNested(out.feature_shapes_, ifile);
+    out.readNested(out.truth_shapes_, ifile);
+    out.readNested(out.weight_shapes_, ifile);
+
+    out.feature_arrays_ .readFromFile(ifile);
+    out.truth_arrays_.readFromFile(ifile);
+    out.weight_arrays_.readFromFile(ifile);
+    return out;
+}
+
+void trainData::priv_readSelfFromFileP(FILE *& ifile, const std::string& filename){
+    checkFile(ifile, filename);
+    readNested(feature_shapes_, ifile);
+    readNested(truth_shapes_, ifile);
+    readNested(weight_shapes_, ifile);
+
+    feature_arrays_ .readFromFile(ifile);
+    truth_arrays_.readFromFile(ifile);
+    weight_arrays_.readFromFile(ifile);
+}
+
+void trainData::readMetaDataFromFile(const std::string& filename){
+
+    FILE *ifile = fopen(filename.data(), "rb");
+    checkFile(ifile,filename);
+
+    readNested(feature_shapes_, ifile);
+    readNested(truth_shapes_, ifile);
+    readNested(weight_shapes_, ifile);
+
+    //read dtypes
+
+    feature_arrays_ .readMetaDataFromFile(ifile);
+    truth_arrays_.readMetaDataFromFile(ifile);
+    weight_arrays_.readMetaDataFromFile(ifile);
+
+    fclose(ifile);
+
+}
+
+std::vector<int64_t> trainData::getFirstRowsplits()const{
+
+    const std::vector<const typeContainer* > vv = {&feature_arrays_, &truth_arrays_, &weight_arrays_};
+    for(const auto& a: vv)
+        for (size_t i=0;i<a->size();i++)
+            if(a->at(i).rowsplits().size())
+                return a->at(i).rowsplits();
+
+    return std::vector<int64_t>();
+}
+
+std::vector<int64_t> trainData::readShapesAndRowSplitsFromFile(const std::string& filename, bool checkConsistency){
+    std::vector<int64_t> rowsplits;
+
+    FILE *ifile = fopen(filename.data(), "rb");
+    checkFile(ifile,filename);
+
+    //shapes
+    std::vector<std::vector<int> > dummy;
+    readNested(feature_shapes_, ifile);
+    readNested(truth_shapes_, ifile);
+    readNested(weight_shapes_, ifile);
+
+    //features
+    readRowSplitArray(ifile,rowsplits,checkConsistency);
+    if(!checkConsistency && rowsplits.size()){
+        fclose(ifile);
+        return rowsplits;
+    }
+    //truth
+    readRowSplitArray(ifile,rowsplits,checkConsistency);
+    if(!checkConsistency && rowsplits.size()){
+        fclose(ifile);
+        return rowsplits;
+    }
+    //weights
+    readRowSplitArray(ifile,rowsplits,checkConsistency);
+
+    fclose(ifile);
+    return rowsplits;
+
+}
+
+void trainData::clear() {
+    feature_arrays_.clear();
+    truth_arrays_.clear();
+    weight_arrays_.clear();
+    updateShapes();
+}
+
+void trainData::checkFile(FILE *& ifile, const std::string& filename)const{
+    if(!ifile)
+        throw std::runtime_error("trainData::readFromFile: file "+filename+" could not be opened.");
+    float version = 0;
+    io::readFromFile(&version, ifile);
+    if(!checkVersionCompatible(version))
+        throw std::runtime_error("trainData::readFromFile: wrong format version");
+
+}
+
+void trainData::readRowSplitArray(FILE *& ifile, std::vector<int64_t> &rowsplits, bool check)const{
+    size_t size = 0;
+    io::readFromFile(&size, ifile);
+    for(size_t i=0;i<size;i++){
+        auto frs = simpleArrayBase::readRowSplitsFromFileP(ifile, true);
+        if(frs.size()){
+            if(check){
+                if(rowsplits.size() && rowsplits != frs)
+                    throw std::runtime_error("trainData::readShapesAndRowSplitsFromFile: row splits inconsistent");
+            }
+            rowsplits=frs;
+        }
+    }
+}
+
+std::vector<std::vector<int> > trainData::getShapes(const typeContainer& a)const{
+    std::vector<std::vector<int> > out;
+    for(size_t i=0;i<a.size();i++)
+        out.push_back(a.at(i).shape());
+    return out;
+}
+
+void trainData::updateShapes(){
+
+    feature_shapes_ = getShapes(feature_arrays_);
+    truth_shapes_ = getShapes(truth_arrays_);
+    weight_shapes_ = getShapes(weight_arrays_);
+
+}
+
+void trainData::skim(size_t batchelement){
+    if(batchelement > nElements())
+        throw std::out_of_range("trainData<T>::skim: batch element out of range");
+    *this = getSlice(batchelement,batchelement+1);
+}
+
+
+boost::python::list trainData::transferNamesToPyList(const typeContainer& tc)const{
+    boost::python::list out;
+    for(size_t i=0;i<tc.size();i++){
+        auto name = tc.at(i).name();
+        if(! name.length()){
+            name = std::to_string(i);//set a default name
+        }
+        out.append(name);
+        if(tc.at(i).isRagged())
+            out.append(name+"_rowsplits");
+    }
+    return out;
+}
+
+boost::python::list trainData::transferShapesToPyList(const std::vector<std::vector<int> >& vs)const{
+    boost::python::list out;
+    for(const auto& a: vs){
+        boost::python::list nlist;
+        bool wasragged=false;
+        for(size_t i=1;i<a.size();i++){
+            if(a.at(i)<0){
+                nlist = boost::python::list();//ignore everything before
+                wasragged=true;
+            }
+            else
+                nlist.append(std::abs(a.at(i)));
+        }
+        out.append(nlist);
+        if(wasragged){
+            boost::python::list rslist;
+            rslist.append(1);
+            out.append(rslist);
+        }
+    }
+    return out;
+}
+
+boost::python::list trainData::transferDTypesToPyList(const typeContainer& tc)const{
+    boost::python::list out;
+    for(size_t k=0;k<tc.size();k++){
+        const auto& a = tc.at(k).shape();
+
+        bool isragged=false;
+        for(size_t i=0;i<a.size();i++){
+            if(a.at(i)<0){
+                isragged=true;
+                break;
+            }
+        }
+        out.append(tc.at(k).dtypeString());
+        if(isragged)
+            out.append("int64");
+    }
+    return out;
+}
+
+
+boost::python::list trainData::getTruthRaggedFlags()const{
+    boost::python::list out;
+    for(const auto& a: truth_shapes_){
+        bool isragged = false;
+        for(const auto & s: a)
+            if(s<0){
+                isragged=true;
+                break;
+            }
+        if(isragged)
+            out.append(true);
+        else
+            out.append(false);
+    }
+    return out;
+}
+
+boost::python::list trainData::transferToNumpyList(typeContainer& c, bool pad_rowsplits){
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
+    p::list out;
+    for(size_t i=0;i<c.size();i++){
+        auto& a = c.at(i);
+        if(a.isRagged()){
+            auto arrt = a.transferToNumpy(pad_rowsplits);//pad row splits
+            out.append(arrt[0]);//data
+            np::ndarray rs = boost::python::extract<np::ndarray>(arrt[1]);
+            out.append(rs.reshape(p::make_tuple(-1,1)));//row splits
+        }
+        else
+            out.append(a.transferToNumpy(false)[0]);
+    }
+    return out;
+}
+
+
+boost::python::list trainData::transferFeatureListToNumpy(bool padrowsplits){
+    return transferToNumpyList(feature_arrays_,padrowsplits);
+}
+
+boost::python::list trainData::transferTruthListToNumpy(bool padrowsplits){
+    return transferToNumpyList(truth_arrays_,padrowsplits);
+}
+
+boost::python::list trainData::transferWeightListToNumpy(bool padrowsplits){
+    return transferToNumpyList(weight_arrays_,padrowsplits);
+}
+
+}//ns
diff --git a/compiled/src/trainDataFileStreamer.cpp b/compiled/src/trainDataFileStreamer.cpp
new file mode 100644
index 0000000..f13ee02
--- /dev/null
+++ b/compiled/src/trainDataFileStreamer.cpp
@@ -0,0 +1,196 @@
+
+#include "../interface/trainDataFileStreamer.h"
+#include "../interface/trainData.h"
+
+namespace djc{
+
+trainDataFileStreamer::trainDataFileStreamer(
+        const std::string & filename,
+        float bufferInMB):filename_(filename),buffermb_(bufferInMB){
+    //create the file
+    FILE *ofile = fopen(filename.data(), "wb");
+    fclose(ofile);
+
+    activestreamers_=&arraystreamers_a_; //not threaded yet
+    writingstreamers_=&arraystreamers_b_;
+}
+
+
+void trainDataFileStreamer::writeBuffer(bool sync){//sync has no effect yet
+
+    auto writestreamers = activestreamers_;//not threaded yet
+
+    trainData td;
+    for(auto& a: *writestreamers){
+        auto acp = a->copyToFullArray();
+        if(a->dusage_ == simpleArrayFiller::feature_data)
+            td.storeFeatureArray(*acp);
+        else if(a->dusage_ == simpleArrayFiller::truth_data)
+            td.storeTruthArray(*acp);
+        else if(a->dusage_ == simpleArrayFiller::feature_data)
+            td.storeWeightArray(*acp);
+
+        //clean up
+        a->clearData();
+    }
+
+    td.addToFile(filename_);
+
+}
+
+bool trainDataFileStreamer::bufferFull(){
+    size_t totalsizekb=0;
+    for(auto& a: *activestreamers_)
+        totalsizekb += a->memSizeKB();
+    if(((float)totalsizekb)/1024. >= buffermb_)
+        return true;
+    return false;
+}
+
+namespace test{
+
+#include "helper.h"
+
+std::vector<float> createRandomVector(size_t i){
+    return GenerateRandomVector<float>(i);
+}
+
+std::vector<std::vector<float> > createRandomVector(size_t i,size_t j){
+    std::vector<std::vector<float> > out(i);
+    for(size_t ii=0;ii<i;ii++){
+        out.at(ii) = GenerateRandomVector<float>(j);
+    }
+    return out;
+}
+
+
+void testTrainDataFileStreamer(){
+
+    std::string testfilename = "test_testTrainDataFileStreamer_outfile.djctd";
+
+    simpleArray_float32 reference_myfeatures_all;
+    reference_myfeatures_all.setFeatureNames({"jetpt","jeteta","jetphi"});
+    reference_myfeatures_all.setName("myfeatures");
+
+    simpleArray_float32 reference_myzeropadded_lepton_features_all;
+    reference_myzeropadded_lepton_features_all.setFeatureNames({"pt","eta","phi"});
+    reference_myzeropadded_lepton_features_all.setName("myzeropadded_lepton_features");
+    simpleArray_int32 reference_isSignal_all;
+    reference_isSignal_all.setName("isSignal");
+
+    { //file streamer scope
+        trainDataFileStreamer fs(testfilename,0.07);//small buffer for testing
+
+        simpleArrayFiller* features = fs.add("myfeatures",                      // just a name, can also be left blank
+                {3},                               // the shape, here just 3 features
+                simpleArrayBase::float32,          // the data type
+                simpleArrayFiller::feature_data, // what it's used for
+                true,                              // data is ragged (variable 1st dimension)
+                {"jetpt","jeteta","jetphi"});      // optional feature names
+
+
+        simpleArrayFiller* zeropadded = fs.add("myzeropadded_lepton_features",// just a name, can also be left blank
+                {5,3},                             // 3 features each for the first 5 leptons
+                simpleArrayBase::float32,          // the data type
+                simpleArrayFiller::feature_data, // what it's used for
+                false,                             // data is not ragged
+                {"pt","eta","phi"});               // optional feature names
+
+        //add a non ragged per-event variable
+        simpleArrayFiller* truth = fs.add("isSignal",{1},simpleArrayBase::int32,simpleArrayFiller::truth_data, false);
+
+
+
+        for(int i=0;i<3000;i++){
+            int nfirst = i+1;
+            while(nfirst>30){
+                nfirst-=30;
+            }
+            auto jetprop = createRandomVector(nfirst,3);
+
+            std::vector<int64_t> jetrs={0,nfirst};
+
+            simpleArray_float32 reference_jetarr({1,-1,3},jetrs);
+            reference_jetarr.setName("jetarr");
+
+            for(size_t i=0;i<nfirst;i++){
+                features->arr().set(0, jetprop[i][0]);
+                features->arr().set(1, jetprop[i][1]);
+                features->arr().set(2, jetprop[i][2]);
+                features->fill();
+
+                reference_jetarr.set(0,i,0, jetprop[i][0]);
+                reference_jetarr.set(0,i,1, jetprop[i][1]);
+                reference_jetarr.set(0,i,2, jetprop[i][2]);
+            }
+
+            //this should not be done excessively in a real setting, needs mem copy every call!
+            reference_myfeatures_all.append(reference_jetarr);
+
+            auto lepprop = createRandomVector(5,3);
+            zeropadded->arr().fillZeros(); //make sure everything is initialized with zeros
+
+            simpleArray_float32 reference_leparr({1,5,3});
+            reference_leparr.setName("leparr");
+            reference_leparr.fillZeros();
+
+            for(size_t i=0;i<lepprop.size();i++){
+                zeropadded->arr().set(i,0,lepprop.at(i)[0]);
+                zeropadded->arr().set(i,1,lepprop.at(i)[1]);
+                zeropadded->arr().set(i,2,lepprop.at(i)[2]);
+
+                reference_leparr.set(0,i,0,lepprop.at(i)[0]);
+                reference_leparr.set(0,i,1,lepprop.at(i)[1]);
+                reference_leparr.set(0,i,2,lepprop.at(i)[2]);
+                if(i>3)
+                    break;
+            }
+            zeropadded->fill();
+
+            reference_myzeropadded_lepton_features_all.append(reference_leparr);
+
+            int issignal = i%2;
+            truth->arr().set(0, issignal);
+            truth->fill();
+
+            simpleArray_int32 reference_issig({1,1});
+            reference_issig.setName("issig");
+            reference_issig.set(0,0,issignal);
+            reference_isSignal_all.append(reference_issig);
+
+            fs.fillEvent();
+
+
+        }
+    }//file streamer scope, auto save
+
+    //read back and check if same
+    trainData td;
+    td.readFromFile(testfilename);
+    td.nFeatureArrays();
+
+    //a bit quick and dirty casts. We know the type (otherwise would need type check first)
+    auto reopened_myfeatures = dynamic_cast<simpleArray_float32&>(td.featureArray(0));
+    auto reopened_lepton_features = dynamic_cast<simpleArray_float32&>(td.featureArray(1));
+    auto reopened_issignal = dynamic_cast<simpleArray_int32&>(td.truthArray(0));
+
+    if(reopened_myfeatures != reference_myfeatures_all){
+       // tdfeat0.cout();myfeatures_all.cout();
+        throw std::logic_error("testTrainDataFileStreamer: simpleArray_float32 ragged inconsistent");
+    }
+    if(reopened_lepton_features != reference_myzeropadded_lepton_features_all){
+       // tdfeat1.cout();
+        throw std::logic_error("testTrainDataFileStreamer: simpleArray_float32 not ragged inconsistent");
+    }
+    if(reopened_issignal != reference_isSignal_all){
+       // tdtruth.cout();
+        throw std::logic_error("testTrainDataFileStreamer: simpleArray_float32 ragged inconsistent");
+    }
+    //avoid warning
+    int res = system(("rm -f "+testfilename).data());
+
+}
+
+}//test
+
+}//djc
diff --git a/compiled/src/trainDataGenerator.cpp b/compiled/src/trainDataGenerator.cpp
new file mode 100644
index 0000000..f591ec3
--- /dev/null
+++ b/compiled/src/trainDataGenerator.cpp
@@ -0,0 +1,420 @@
+#include "../interface/trainDataGenerator.h"
+
+namespace djc{
+
+
+trainDataGenerator::trainDataGenerator() :debuglevel(0),
+        randomcount_(1), batchsize_(2),sqelementslimit_(false),skiplargebatches_(true), readthread_(0), nextreadIdx_(0), filecount_(0), nbatches_(
+                0), npossiblebatches_(0), ntotal_(0), nsamplesprocessed_(0),lastbatchsize_(0),filetimeout_(10),
+                batchcount_(0),lastbuffersplit_(0){
+}
+
+trainDataGenerator::~trainDataGenerator(){
+    if(readthread_){
+        readthread_->join();
+        delete readthread_;
+    }
+
+}
+
+void trainDataGenerator::setFileListPy(boost::python::list files){
+    trainDataGenerator::setFileList(toSTLVector<std::string>(files));
+}
+
+void trainDataGenerator::shuffleFileList(){
+    std::random_device rd;
+    std::mt19937 g(rd());
+    g.seed(randomcount_);
+    randomcount_++;
+    std::shuffle(std::begin(shuffle_indices_),std::end(shuffle_indices_),g);
+
+    for(const auto i:shuffle_indices_)
+        std::shuffle(std::begin(sub_shuffle_indices_.at(i)),
+                std::end(sub_shuffle_indices_.at(i)),g);
+
+    //redo splits etc
+    prepareSplitting();
+    batchcount_=0;
+    lastbuffersplit_=0;
+}
+
+void trainDataGenerator::setBuffer(const trainData& td){
+
+    clear();
+    if(td.featureShapes().size()<1 || td.featureShapes().at(0).size()<1)
+        throw std::runtime_error("trainDataGenerator<T>::setBuffer: no features filled in trainData object");
+    auto hasRagged = tdHasRaggedDimension(td);
+
+    auto rs = td.getFirstRowsplits();
+    if(rs.size())
+        orig_rowsplits_.push_back(rs);
+    shuffle_indices_.push_back(0);
+    std::vector<size_t> vec;
+    for(size_t i=0;i<td.nElements();i++)
+        vec.push_back(i);
+    sub_shuffle_indices_.push_back(vec);
+    ntotal_ = td.nElements();
+    buffer_store=td;
+    lastbuffersplit_=0;
+    prepareSplitting();
+}
+
+void trainDataGenerator::readBuffer(){ //inject by file shuffle here
+    size_t ntries = 0;
+    std::exception caught;
+    while(ntries < filetimeout_){
+        if(io::fileExists(nextread_)){
+            try{
+                if(debuglevel>0)
+                    std::cout << "reading file " << nextread_ << std::endl;
+                //use mem buffered read, read whole file in one go and then decompress etc from memory
+                buffer_read.readFromFileBuffered(nextread_);
+                if(debuglevel>0)
+                    std::cout << "reading file " << nextread_ << " done"<< std::endl;
+                buffer_read = buffer_read.shuffle(sub_shuffle_indices_.at(nextreadIdx_));
+                return;
+            }
+            catch(std::exception & e){ //if there are data glitches we don't want the whole training fail immediately
+                caught=e;
+                std::cout << "File not "<< nextread_ <<" successfully read: " << e.what() << std::endl;
+                std::cout << "trying " << filetimeout_-ntries << " more time(s)" << std::endl;
+                ntries+=1;
+            }
+        }
+        sleep(1);
+        ntries++;
+    }
+    buffer_read.clear();
+    throw std::runtime_error("trainDataGenerator<T>::readBuffer: file "+nextread_+ " could not be read.");
+}
+
+void trainDataGenerator::readInfo(){
+    ntotal_=0;
+    bool hasRagged=false;
+    bool firstfile=true;
+
+    shuffle_indices_.resize(orig_infiles_.size());
+    for(size_t i=0;i<shuffle_indices_.size();i++)
+        shuffle_indices_[i]=i;
+
+    for(const auto& f: orig_infiles_){
+        trainData td;
+
+        td.readMetaDataFromFile(f);
+        //first dimension is always Nelements. At least features are filled
+        if(td.featureShapes().size()<1 || td.featureShapes().at(0).size()<1)
+            throw std::runtime_error("trainDataGenerator<T>::readNTotal: no features filled in trainData object "+f);
+
+        //create sub_shuffle_idxs
+        std::vector<size_t> vec;
+        for(size_t i=0;i<td.nElements();i++){
+            vec.push_back(i);
+        }
+        sub_shuffle_indices_.push_back(vec);
+
+        if(firstfile){
+            hasRagged = tdHasRaggedDimension(td);
+        }
+        if(hasRagged){
+            std::vector<int64_t> rowsplits = td.readShapesAndRowSplitsFromFile(f, firstfile);//check consistency only for first
+            if(debuglevel>1)
+                std::cout << "rowsplits.size() " <<rowsplits.size() << ": "<<f <<  std::endl; //debuglevel
+            orig_rowsplits_.push_back(rowsplits);
+        }
+        firstfile=false;
+        ntotal_ += td.nElements();
+    }
+    if(debuglevel>0)
+        std::cout << "trainDataGenerator<T>::readInfo: total elements "<< ntotal_ <<std::endl;
+    batchcount_=0;
+    lastbuffersplit_=0;
+    prepareSplitting();
+}
+
+std::vector<int64_t> trainDataGenerator::subShuffleRowSplits(const std::vector<int64_t>& thisrs,
+        const std::vector<size_t>& s_idx)const{
+
+    auto nelems = simpleArrayBase::dataSplitToSplitIndices(thisrs);
+    auto snelems=nelems;
+    //shuffle
+    for(size_t si=0;si<s_idx.size();si++){
+        snelems.at(si) = nelems.at(s_idx.at(si));
+    }
+    return simpleArrayBase::splitToDataSplitIndices(snelems);
+
+}
+
+void trainDataGenerator::prepareSplitting(){
+    splits_.clear();
+    nbatches_=0;
+    if(orig_rowsplits_.size()<1){//no row splits, just equal batch size except for last batch
+        size_t used_events=0;
+        while(used_events<ntotal_){
+            if(used_events + batchsize_ <= ntotal_){
+                splits_.push_back(batchsize_);
+                used_events+=batchsize_;
+                nbatches_++;
+            }
+            else{
+                splits_.push_back(ntotal_-used_events);
+                nbatches_++;
+                break;
+            }
+        }
+        if(debuglevel>1){
+            std::cout << "trainDataGenerator<T>::prepareSplitting: splits" <<std::endl;
+            for(const auto& s: splits_)
+                std::cout << s << ", ";
+            std::cout << std::endl;
+        }
+        return;
+    }
+
+    ///////row splits part
+
+    std::vector<int64_t> allrs;
+    for(size_t i=0;i<orig_rowsplits_.size();i++){
+        auto shuffled_idx = shuffle_indices_.at(i);
+        auto thisrs = orig_rowsplits_.at(shuffled_idx); //inject by file shuffle here
+        thisrs = subShuffleRowSplits(thisrs, sub_shuffle_indices_.at(shuffled_idx));
+
+        if(i==0 || allrs.size()==0){
+            allrs=thisrs;}
+        else{
+            allrs = simpleArrayBase::mergeRowSplits(allrs,thisrs);
+        }
+    }
+
+    if(debuglevel>1){
+        std::cout << "all (first 100) row splits " <<  allrs.size() << std::endl;
+        int counter =0;
+        for(const auto& s: allrs){
+            std::cout << s << ", " ;
+            if(counter>100)break;
+            counter++;
+        }
+        std::cout << std::endl;
+    }
+    std::vector<size_t> nelems_per_split;
+    splits_ = simpleArrayBase::getSplitIndices(allrs, batchsize_,sqelementslimit_ , skiplargebatches_, usebatch_, nelems_per_split);
+
+    nbatches_=0;
+    npossiblebatches_=0;
+    for(size_t i=0;i<usebatch_.size();i++){
+        npossiblebatches_++;
+        if(usebatch_.at(i))
+            nbatches_++;
+    }
+
+
+    if(debuglevel>1){
+        size_t nprint = splits_.size();
+        if(nprint>200)nprint=200;
+        for(size_t i=0;i< nprint;i++){
+            std::cout << i ;
+            if(usebatch_.at(i))
+                std::cout << " ok, split " ;
+            else
+                std::cout << " no, split ";
+            std::cout << splits_.at(i) << "; nelements "<< nelems_per_split.at(i)<< std::endl;
+        }
+        std::cout << std::endl;
+    }
+
+}
+
+bool trainDataGenerator::tdHasRaggedDimension(const trainData& td)const{
+    for(const auto& sv: td.featureShapes())
+        for(const auto& s:sv)
+            if(s<0)
+                return true;
+    for(const auto& sv: td.truthShapes())
+        for(const auto& s:sv)
+            if(s<0)
+                return true;
+    for(const auto& sv: td.weightShapes())
+        for(const auto& s:sv)
+            if(s<0)
+                return true;
+    return false;
+}
+
+bool trainDataGenerator::lastBatch()const{
+    return batchcount_ >= npossiblebatches_ -1 ;
+}
+
+bool trainDataGenerator::isEmpty()const{
+    return batchcount_ >= splits_.size();
+}
+
+void trainDataGenerator::prepareNextEpoch(){
+
+    //prepare for next epoch, pre-read first file
+    if(readthread_){
+        readthread_->join(); //this is slow! FIXME: better way to exit gracefully in a simple way
+        delete readthread_;
+
+    }
+    buffer_store.clear();
+    buffer_read.clear();
+    filecount_=0;
+    nsamplesprocessed_=0;
+    batchcount_=0;
+    lastbatchsize_=0;
+    lastbuffersplit_=0;
+    nextreadIdx_ = shuffle_indices_.at(filecount_);
+    nextread_ = orig_infiles_.at(nextreadIdx_);
+    filecount_++;
+    readthread_ = new std::thread(&trainDataGenerator::readBuffer,this);
+
+}
+
+void trainDataGenerator::end(){
+    if(readthread_){
+        readthread_->join(); //this is slow! FIXME: better way to exit gracefully in a simple way
+        delete readthread_;
+        readthread_=0;
+    }
+}
+
+void trainDataGenerator::clear(){
+    end();
+    orig_infiles_.clear();
+    shuffle_indices_.clear();
+    sub_shuffle_indices_.clear();
+    orig_rowsplits_.clear();
+    splits_.clear();
+    usebatch_.clear();
+    randomcount_=0;
+
+    //batchsize_ keep batch size
+    //sqelementslimit_ keep
+    //skiplargebatches_ keep
+    buffer_store.clear();
+    buffer_read.clear();
+
+    filecount_=0;
+    nbatches_=0;
+    ntotal_=0;
+    nsamplesprocessed_=0;
+    lastbatchsize_=0;
+    lastbuffersplit_=0;
+    // filetimeout_ keep
+    batchcount_=0;
+}
+
+trainData trainDataGenerator::getBatch(){
+    return prepareBatch();
+}
+
+trainData  trainDataGenerator::prepareBatch(){
+    if(isEmpty()){
+        std::cout << "trainDataGenerator::prepareBatch: batchcount " << batchcount_ << ", available: " << splits_.size() << std::endl;
+        throw std::runtime_error("trainDataGenerator::prepareBatch: asking for more batches than in dataset");
+    }
+
+    size_t bufferelements=buffer_store.nElements();
+    size_t expect_batchelements = splits_.at(batchcount_);
+    bool usebatch = true;
+
+    if(!expect_batchelements)//sanity check
+        throw std::runtime_error("trainDataGenerator<T>::prepareBatch: expected elements zero!");
+
+    if(usebatch_.size())
+        usebatch = usebatch_.at(batchcount_);
+
+    if(debuglevel>2)
+        std::cout << "expect_batchelements "<<expect_batchelements << " vs " << bufferelements-lastbuffersplit_ <<" bufferelements" << std::endl;
+
+    while(bufferelements-lastbuffersplit_<expect_batchelements){
+        //if thread, read join
+        if(readthread_){
+            readthread_->join();
+            delete readthread_;
+            readthread_=0;
+        }
+        if(lastbuffersplit_)
+            if(lastbuffersplit_ != buffer_store.nElements()){
+                buffer_store = buffer_store.getSlice(lastbuffersplit_,buffer_store.nElements());//cut the front part
+                buffer_store.append(buffer_read);
+            }
+            else{ //was used completely
+                buffer_store = buffer_read;//std::move(buffer_read); //possible opt. implement move for trainData fully
+            }
+        else{ //first one
+            buffer_store.append(buffer_read);//std::move(buffer_read);
+        }
+        buffer_read.clear();
+        bufferelements = buffer_store.nElements();
+        lastbuffersplit_=0;
+
+        if(debuglevel>2)
+            std::cout << "nprocessed " << nsamplesprocessed_ << " file " << filecount_ << " in buffer " << bufferelements
+            << " file read " << nextread_ << " totalfiles " << orig_infiles_.size()
+            << " total events "<< ntotal_<< std::endl;
+
+        if(nsamplesprocessed_ + bufferelements < ntotal_){
+            if (filecount_ >= orig_infiles_.size()){
+                std::cout << "trainDataGenerator<T>::prepareBatch: filecount: "<<  filecount_ <<" infiles "<< orig_infiles_.size()<<
+                        " processed: "<< nsamplesprocessed_ << " buffer:  "<< bufferelements << " total "<< ntotal_ << std::endl;
+                throw std::runtime_error(
+                        "trainDataGenerator<T>::prepareBatch: more file reads requested than batches in the sample");
+
+            }
+
+            nextreadIdx_ = shuffle_indices_.at(filecount_);
+            nextread_ = orig_infiles_.at(nextreadIdx_);
+
+            if(debuglevel>0)
+                std::cout << "start new read on file "<< nextread_ <<std::endl;
+
+            filecount_++;
+            readthread_ = new std::thread(&trainDataGenerator::readBuffer,this);
+        }
+    }
+
+    if( ! buffer_store.validSlice(lastbuffersplit_, lastbuffersplit_+expect_batchelements)){
+        throw std::runtime_error("trainDataGenerator::prepareBatch: split error");
+    }
+
+    //auto thisbatch = buffer_store.split(expect_batchelements);
+    auto thisbatch = buffer_store.getSlice(lastbuffersplit_, lastbuffersplit_+expect_batchelements);
+
+    lastbuffersplit_+=expect_batchelements;
+    // validSlice
+
+    if(thisbatch.nTotalElements() < 1){
+      //not sure why this can happen, there might be some bigger problem here. This at least prevents crashes.
+   //   return prepareBatch();
+    }
+
+    if(debuglevel>2)
+        std::cout << "providing batch " << nsamplesprocessed_ << "-" << nsamplesprocessed_+expect_batchelements <<
+        ", slice " << lastbuffersplit_-expect_batchelements << "-" << lastbuffersplit_ <<
+        "\nelements in buffer before: " << bufferelements <<
+        "\nsplitting at " << expect_batchelements << " use this batch "<<  usebatch
+        << " total elements " << thisbatch.nTotalElements() << " elements left in buffer " << buffer_store.nElements()<< std::endl;
+
+    if(debuglevel>3){
+        int dbpcount=0;
+        for(const auto& s: buffer_store.featureArray(0).rowsplits()){
+            std::cout << s << ", ";
+            if(dbpcount>50)break;
+            dbpcount++;
+        }
+        std::cout << std::endl;
+    }
+
+    nsamplesprocessed_+=expect_batchelements;
+    lastbatchsize_ = expect_batchelements;
+
+    batchcount_++;
+    if(! usebatch){//until valid batch
+        return prepareBatch();
+    }
+
+    return thisbatch;
+
+}
+
+}//ns
diff --git a/compiled/src/version.cpp b/compiled/src/version.cpp
new file mode 100644
index 0000000..1c63fce
--- /dev/null
+++ b/compiled/src/version.cpp
@@ -0,0 +1,31 @@
+/*
+ * version.cpp
+ *
+ *  Created on: 8 Feb 2021
+ *      Author: jkiesele
+ */
+
+#include "../interface/version.h"
+#include <iostream>
+
+bool warning_issued=false;
+
+bool checkVersionCompatible(const float& version){
+    bool compatprevious = version == DJCDATAVERSION_COMPAT;
+    if(compatprevious && !warning_issued){
+        std::cout
+        << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
+        << "WARNING:\n"
+        << "You are reading an old DeepJetCore file format ("<<DJCDATAVERSION_COMPAT <<") "
+        << "while the current file format version is "<< DJCDATAVERSION <<".\n"
+        << "The data can be read in compatibility mode,\n"
+        << "but please update the data set by either recreating it\n"
+        << "or using the script convertDCFromPreviousMinorVersion.py.\n"
+        << "COMPATIBILITY WILL BE PROVIDED ONLY UNTIL THE NEXT FILE FORMAT CHANGE.\n"
+        << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"<< std::endl;
+        warning_issued=true;
+    }
+    return version == DJCDATAVERSION || compatprevious;
+}
+
+
diff --git a/compiled/to_bin/make_example_data.cpp b/compiled/to_bin/make_example_data.cpp
new file mode 100644
index 0000000..0e8583a
--- /dev/null
+++ b/compiled/to_bin/make_example_data.cpp
@@ -0,0 +1,312 @@
+/*
+ * make_example_data.cpp
+ *
+ *  Created on: 30 Apr 2019
+ *      Author: jkiesele
+ */
+
+#include "TRandom3.h"
+#include "TFile.h"
+#include <vector>
+#include "TTree.h"
+#include "math.h"
+#include <iostream>
+#include <fstream>
+
+
+
+class dataGenerator{
+public:
+
+
+    dataGenerator():size_(32),type_(0),rand_(new TRandom3(123)){setSize(size_);}
+    ~dataGenerator(){delete rand_;}
+
+    void gen();
+
+    void setSize(int s){
+        size_=s;
+        image_.resize(size_,std::vector<float>(size_));
+
+        xcoords_.clear();
+        ycoords_.clear();
+        for(float x=0;x<(float)size_;x++){
+            for(float y=0;y<(float)size_;y++){
+                ycoords_.push_back(y);
+                xcoords_.push_back(x);
+            }
+        }
+
+    }
+    int getSize()const{return size_;}
+
+    void setType(int t){
+        type_=t;
+    }
+
+    const std::vector<float>& getXCoords()const{return xcoords_;}
+    const std::vector<float>& getYCoords()const{return ycoords_;}
+
+    const std::vector<std::vector<float> > & getImage()const{return image_;}
+
+    std::vector<float>  getImageSeq()const;
+    //not protected
+       std::vector<std::vector<float> > addImage(const std::vector<std::vector<float> >&, const std::vector<std::vector<float> >&);
+
+       std::vector<std::vector<float> > divideImage(const std::vector<std::vector<float> >&, const std::vector<std::vector<float> >&);
+       std::vector<float>  divideImage(const std::vector<float> &, const std::vector<float> &);
+
+       std::vector<float>  addImage(const std::vector<float>&, const std::vector<float>&);
+
+
+private:
+
+    std::vector<std::vector<float> > makeImage(float xc, float yc, float xw, float yw, float scale)const;
+
+
+    std::vector<std::vector<float> > image_;
+    std::vector<float> xcoords_,ycoords_;
+    int size_;
+    int type_;
+    TRandom3 * rand_;
+
+};
+
+
+int main(int argc, char* argv[]){
+    int nevents=500;
+    int nfiles=10;
+    int ntest=1;
+
+    if(argc>1)
+        nevents=atoi(argv[1]);
+    if(argc>2)
+        nfiles=atoi(argv[2]);
+    if(argc>3)
+        ntest=atoi(argv[3]);
+
+
+    dataGenerator gen;
+
+    std::ofstream outtxtfile("train_files.txt");
+    std::ofstream testouttxtfile("test_files.txt");
+
+
+    for(int i=0;i<nfiles+ntest;i++){
+        TString fname="out_";
+        fname+=i;
+        fname+=".root";
+
+        if(i < nfiles)
+            outtxtfile << fname << std::endl;
+        else
+            testouttxtfile << fname << std::endl;
+
+        TFile f(fname,"RECREATE");
+        TTree * t = new TTree("tree","tree");
+
+        std::vector<float> imagetot;
+        std::vector<float> * imagetotp = &imagetot;
+        t->Branch("image",&imagetotp);
+
+        std::vector<std::vector<float> > imagetot2d;
+        std::vector<std::vector<float> > * imagetot2dp = &imagetot2d;
+        t->Branch("image2d",&imagetot2dp);
+
+        std::vector<float> sigfrac;
+        std::vector<float> * sigfracp = &sigfrac;
+        t->Branch("sigfrac",&sigfracp);
+
+        std::vector<std::vector<float> > sigfrac2d;
+        std::vector<std::vector<float> > * sigfrac2dp = &sigfrac2d;
+        t->Branch("sigfrac2d",&sigfrac2dp);
+
+        std::vector<float> xcoords;
+        std::vector<float> * xcoordsp = &xcoords;
+        t->Branch("xcoords",&xcoordsp);
+
+        std::vector<float> ycoords;
+        std::vector<float> * ycoordsp = &ycoords;
+        t->Branch("ycoords",&ycoords);
+
+        float sigsum=0;
+        t->Branch("sigsum",&sigsum);
+
+        int size;
+        t->Branch("size",&size);
+        int isA,isB,isC;
+        t->Branch("isA",&isA);
+        t->Branch("isB",&isB);
+        t->Branch("isC",&isC);
+        size = gen.getSize();
+
+        xcoords = gen.getXCoords();
+        ycoords = gen.getYCoords();
+
+
+        int type=0;
+
+        for(size_t e=0;e<(size_t)nevents;e++){
+
+            gen.setType(type+1);
+            if(type==0){
+                isA=1;isB=0;isC=0;
+            }
+            else if(type==1){
+                isA=0;isB=1;isC=0;
+            }
+            else if(type==2){
+                isA=0;isB=0;isC=1;
+            }
+
+            type++;
+            if(type>2)
+                type=0;
+
+            gen.gen();
+
+            auto s  = gen.getImageSeq();
+            sigsum=0;
+            for(const auto& sc:s)
+                sigsum+=sc;
+
+            auto s2d = gen.getImage();
+            auto bg = gen.getImageSeq();
+            auto bg2d = gen.getImage();
+
+            imagetot = gen.addImage(s,bg);
+
+            imagetot2d = gen.addImage(s2d,bg2d);
+
+            sigfrac2d = gen.divideImage(s2d,imagetot2d);
+
+            sigfrac = gen.divideImage(s,imagetot);
+
+            t ->Fill();
+        }
+        t->Write();
+        f.Close();
+        //delete t;
+    }
+
+    outtxtfile.close();
+}
+
+
+
+
+
+void dataGenerator::gen(){
+
+    float xlow=0.45;
+    float xhi = 0.55;
+    float ylow = 0.45;
+    float yhi = 0.55;
+    float xw = 0.25*rand_->Uniform(0.95,1.05);
+    float yw = 0.25*rand_->Uniform(0.95,1.05);
+    //class 0
+
+    if(type_==1){ //class 1
+        xw = rand_->Uniform(0.08,0.15);
+        yw = 0.5*xw*rand_->Uniform(0.95,1.05);
+
+        //for testing
+       // xhi = 0.75;
+       // xlow = 0.6;
+    }
+    else if(type_==2){ //class 2
+        yw = rand_->Uniform(0.1,0.15);
+        xw = 1.2*yw*rand_->Uniform(0.95,1.05);
+
+       // xhi = 0.4;
+       // xlow = 0.25;
+    }
+    else if(type_>2){
+      // xlow=-5;
+      // xhi = 5;
+      // ylow = -5;
+      // yhi = 5;
+       xw = rand_->Uniform(3,4);
+       yw = rand_->Uniform(2,4);
+    }
+
+    float xc  = rand_->Uniform(xlow,xhi);
+    float yc  = rand_->Uniform(ylow,yhi);
+
+    float scale = rand_->Uniform(0.1,3.);
+
+    image_ = makeImage(xc,yc,xw,yw,scale);
+
+}
+
+
+std::vector<std::vector<float> > dataGenerator::makeImage(float xc, float yc, float xw, float yw, float scale)const{
+
+    //to 'size' coordinates
+    xc = (float)size_ * xc;
+    yc = (float)size_ * yc;
+    xw = (float)size_ * xw;
+    yw = (float)size_ * yw;
+
+    std::vector<std::vector<float> > out=image_;
+
+    for(size_t x=0;x<out.size();x++){
+        double dx = (float)x-xc;
+        double xcontr = exp(-dx*dx/(2.*xw*xw));
+        for(size_t y=0;y<out.size();y++){
+            double dy = (float)y-yc;
+            double ycontr = scale*exp(-dy*dy/(2.*yw*yw));
+            out.at(x).at(y) = xcontr*ycontr;
+        }
+    }
+
+    return out;
+}
+
+std::vector<std::vector<float> > dataGenerator::addImage(const std::vector<std::vector<float> >& a, const std::vector<std::vector<float> >& b){
+    auto  out = a;
+
+    for(size_t i=0;i<a.size();i++)
+        for(size_t j=0;j<a.size();j++)
+            out.at(i).at(j) += b.at(i).at(j);
+    return out;
+}
+
+std::vector<float>  dataGenerator::addImage(const std::vector<float>& a, const std::vector<float> & b){
+    auto  out = a;
+
+    for(size_t i=0;i<a.size();i++)
+        out.at(i) += b.at(i);
+    return out;
+}
+
+
+std::vector<std::vector<float> > dataGenerator::divideImage(const std::vector<std::vector<float> >& a, const std::vector<std::vector<float> >& b){
+    auto out = a;
+    for(size_t i=0;i<a.size();i++)
+        for(size_t j=0;j<a.at(i).size();j++)
+            out.at(i).at(j) /= b.at(i).at(j);
+    return out;
+}
+
+std::vector<float>  dataGenerator::divideImage(const std::vector<float> &a, const std::vector<float> &b){
+    auto out = a;
+    for(size_t i=0;i<a.size();i++)
+        out.at(i)/=b.at(i);
+    return out;
+}
+
+
+std::vector<float>  dataGenerator::getImageSeq()const{
+    std::vector<float>  out;
+    for(const auto& x:image_)
+        for(const auto& y:x)
+            out.push_back(y);
+    return out;
+}
+
+
+
+
+
+
diff --git a/compiled/to_bin/make_example_data_overlap.cpp b/compiled/to_bin/make_example_data_overlap.cpp
new file mode 100644
index 0000000..863d7c3
--- /dev/null
+++ b/compiled/to_bin/make_example_data_overlap.cpp
@@ -0,0 +1,326 @@
+/*
+ * make_example_data.cpp
+ *
+ *  Created on: 30 Apr 2019
+ *      Author: jkiesele
+ */
+
+#include "TRandom3.h"
+#include "TFile.h"
+#include <vector>
+#include "TTree.h"
+#include "math.h"
+#include <iostream>
+#include <fstream>
+
+
+
+class dataGenerator{
+public:
+
+
+    dataGenerator(int seed=0):size_(24),type_(0),rand_(new TRandom3(seed)){setSize(size_);}
+    ~dataGenerator(){delete rand_;}
+
+    void gen();
+
+    void setSize(int s){
+        size_=s;
+        image_.resize(size_,std::vector<float>(size_));
+
+        xcoords_.clear();
+        ycoords_.clear();
+        for(float x=0;x<(float)size_;x++){
+            for(float y=0;y<(float)size_;y++){
+                ycoords_.push_back(y);
+                xcoords_.push_back(x);
+            }
+        }
+
+    }
+    int getSize()const{return size_;}
+
+    void setType(int t){
+        type_=t;
+    }
+
+    const std::vector<float>& getXCoords()const{return xcoords_;}
+    const std::vector<float>& getYCoords()const{return ycoords_;}
+
+    const std::vector<std::vector<float> > & getImage()const{return image_;}
+
+    std::vector<float>  getImageSeq()const;
+    //not protected
+       std::vector<std::vector<float> > addImage(const std::vector<std::vector<float> >&, const std::vector<std::vector<float> >&);
+
+       std::vector<std::vector<float> > divideImage(const std::vector<std::vector<float> >&, const std::vector<std::vector<float> >&);
+       std::vector<float>  divideImage(const std::vector<float> &, const std::vector<float> &);
+
+       std::vector<float>  addImage(const std::vector<float>&, const std::vector<float>&);
+
+
+private:
+
+    std::vector<std::vector<float> > makeImage(float xc, float yc, float xw, float yw, float scale)const;
+
+
+    std::vector<std::vector<float> > image_;
+    std::vector<float> xcoords_,ycoords_;
+    int size_;
+    int type_;
+    TRandom3 * rand_;
+
+};
+
+
+int main(int argc, char* argv[]){
+    int nevents=500;
+    int nfiles=10;
+    int ntest=1;
+    int seed=0;//also indicates starting counter
+
+    if(argc>1)
+        nevents=atoi(argv[1]);
+    if(argc>2)
+        nfiles=atoi(argv[2]);
+    if(argc>3)
+        ntest=atoi(argv[3]);
+    if(argc>4)
+        seed=atoi(argv[4]);
+
+
+    dataGenerator gen(seed);
+
+    TString add="";
+    if(seed)
+        add+=seed;
+    std::ofstream outtxtfile((add+"train_files.txt").Data());
+    std::ofstream testouttxtfile((add+"test_files.txt").Data());
+
+    int counter=seed;
+    for(int i=0;i<nfiles+ntest;i++){
+        TString fname="out_";
+        fname+=counter;
+        fname+=".root";
+        counter++;
+
+        if(i < nfiles)
+            outtxtfile << fname << std::endl;
+        else
+            testouttxtfile << fname << std::endl;
+
+        TFile f(fname,"RECREATE");
+        TTree * t = new TTree("tree","tree");
+
+        std::vector<float> imagetot;
+        std::vector<float> * imagetotp = &imagetot;
+        t->Branch("image",&imagetotp);
+
+        std::vector<std::vector<float> > imagetot2d;
+        std::vector<std::vector<float> > * imagetot2dp = &imagetot2d;
+        t->Branch("image2d",&imagetot2dp);
+
+        std::vector<float> sigfrac;
+        std::vector<float> * sigfracp = &sigfrac;
+        t->Branch("sigfrac",&sigfracp);
+
+        std::vector<std::vector<float> > sigfrac2d;
+        std::vector<std::vector<float> > * sigfrac2dp = &sigfrac2d;
+        t->Branch("sigfrac2d",&sigfrac2dp);
+
+        std::vector<float> xcoords;
+        std::vector<float> * xcoordsp = &xcoords;
+        t->Branch("xcoords",&xcoordsp);
+
+        std::vector<float> ycoords;
+        std::vector<float> * ycoordsp = &ycoords;
+        t->Branch("ycoords",&ycoords);
+
+        float sigsum=0;
+        t->Branch("sigsum",&sigsum);
+
+        int size;
+        t->Branch("size",&size);
+        int isA,isB,isC;
+        t->Branch("isA",&isA);
+        t->Branch("isB",&isB);
+        t->Branch("isC",&isC);
+        size = gen.getSize();
+
+        xcoords = gen.getXCoords();
+        ycoords = gen.getYCoords();
+
+
+        int type=0;
+
+        for(size_t e=0;e<(size_t)nevents;e++){
+            /*
+            gen.setType(type);
+            if(type==0){
+                isA=1;isB=0;isC=0;
+            }
+            else if(type==1){
+                isA=0;isB=1;isC=0;
+            }
+            else if(type==2){
+                isA=0;isB=0;isC=1;
+            }
+
+            type++;
+            if(type>2)
+                type=0;
+*/
+
+
+            ///testing
+            gen.setType(1);
+            gen.gen();
+            ///testing
+
+            auto s  = gen.getImageSeq();
+            sigsum=0;
+            for(const auto& sc:s)
+                sigsum+=sc;
+
+            auto s2d = gen.getImage();
+            gen.setType(2);
+            gen.gen();
+            auto bg = gen.getImageSeq();
+            auto bg2d = gen.getImage();
+
+            imagetot = gen.addImage(s,bg);
+
+            imagetot2d = gen.addImage(s2d,bg2d);
+
+            sigfrac2d = gen.divideImage(s2d,imagetot2d);
+
+            sigfrac = gen.divideImage(s,imagetot);
+
+            t ->Fill();
+        }
+        t->Write();
+        f.Close();
+        //delete t;
+    }
+
+    outtxtfile.close();
+}
+
+
+
+
+
+void dataGenerator::gen(){
+
+    float xlow=0.45;
+    float xhi = 0.55;
+    float ylow = 0.45;
+    float yhi = 0.55;
+    float xw = 0.25*rand_->Uniform(0.95,1.05);
+    float yw = 0.25*rand_->Uniform(0.95,1.05);
+    //class 0
+
+    if(type_==1){ //class 1
+        xw = rand_->Uniform(0.08,0.15);
+        yw = 0.5*xw*rand_->Uniform(0.95,1.05);
+
+        //for testing
+        xhi = 0.7;
+        xlow = 0.53;
+    }
+    else if(type_==2){ //class 2
+        yw = rand_->Uniform(0.1,0.15);
+        xw = 1.2*yw*rand_->Uniform(0.95,1.05);
+
+        xhi = 0.47;
+        xlow = 0.3;
+    }
+    else if(type_>2){
+       xlow=-5;
+       xhi = 5;
+       ylow = -5;
+       yhi = 5;
+       xw = rand_->Uniform(3,4);
+       yw = rand_->Uniform(2,4);
+    }
+
+    float xc  = rand_->Uniform(xlow,xhi);
+    float yc  = rand_->Uniform(ylow,yhi);
+
+    float scale = rand_->Uniform(0.1,3.);
+
+    image_ = makeImage(xc,yc,xw,yw,scale);
+
+}
+
+
+std::vector<std::vector<float> > dataGenerator::makeImage(float xc, float yc, float xw, float yw, float scale)const{
+
+    //to 'size' coordinates
+    xc = (float)size_ * xc;
+    yc = (float)size_ * yc;
+    xw = (float)size_ * xw;
+    yw = (float)size_ * yw;
+
+    std::vector<std::vector<float> > out=image_;
+
+    for(size_t x=0;x<out.size();x++){
+        double dx = (float)x-xc;
+        double xcontr = exp(-dx*dx/(2.*xw*xw));
+        for(size_t y=0;y<out.size();y++){
+            double dy = (float)y-yc;
+            double ycontr = scale*exp(-dy*dy/(2.*yw*yw));
+            out.at(x).at(y) = xcontr*ycontr;
+        }
+    }
+
+    return out;
+}
+
+std::vector<std::vector<float> > dataGenerator::addImage(const std::vector<std::vector<float> >& a, const std::vector<std::vector<float> >& b){
+    auto  out = a;
+
+    for(size_t i=0;i<a.size();i++)
+        for(size_t j=0;j<a.size();j++)
+            out.at(i).at(j) += b.at(i).at(j);
+    return out;
+}
+
+std::vector<float>  dataGenerator::addImage(const std::vector<float>& a, const std::vector<float> & b){
+    auto  out = a;
+
+    for(size_t i=0;i<a.size();i++)
+        out.at(i) += b.at(i);
+    return out;
+}
+
+
+std::vector<std::vector<float> > dataGenerator::divideImage(const std::vector<std::vector<float> >& a, const std::vector<std::vector<float> >& b){
+    auto out = a;
+    for(size_t i=0;i<a.size();i++)
+        for(size_t j=0;j<a.at(i).size();j++)
+            out.at(i).at(j) /= b.at(i).at(j);
+    return out;
+}
+
+std::vector<float>  dataGenerator::divideImage(const std::vector<float> &a, const std::vector<float> &b){
+    auto out = a;
+    for(size_t i=0;i<a.size();i++)
+        out.at(i)/=b.at(i);
+    return out;
+}
+
+
+std::vector<float>  dataGenerator::getImageSeq()const{
+    std::vector<float>  out;
+    for(const auto& x:image_)
+        for(const auto& y:x)
+            out.push_back(y);
+    return out;
+}
+
+
+
+
+
+
diff --git a/compiled/to_bin/testpipeline.cpp b/compiled/to_bin/testpipeline.cpp
new file mode 100644
index 0000000..4f245b1
--- /dev/null
+++ b/compiled/to_bin/testpipeline.cpp
@@ -0,0 +1,99 @@
+
+
+#include <iostream>
+#include "../interface/quicklzWrapper.h"
+#include "../interface/simpleArray.h"
+#include "../interface/trainData.h"
+
+#include "../interface/trainDataGenerator.h"
+
+std::ostream& operator<< (std::ostream& os, std::vector<int> v){
+    for(const auto& i:v)
+        os<< i <<" ";
+    return os;
+}
+
+std::ostream& operator<< (std::ostream& os, std::vector<float> v){
+    for(const auto& i:v)
+        os<< i <<" ";
+    return os;
+}
+
+using namespace djc;
+
+int main(){
+
+#ifdef igonorefownow
+    bool write=true;
+    if(write){
+        trainData<float> bigtd;
+
+        auto fidx = bigtd.addFeatureArray({1000, 400, 10});
+        for(size_t i=0;i<bigtd.featureArray(fidx).size();i++)
+            bigtd.featureArray(fidx).data()[i]=i;
+
+        fidx = bigtd.addTruthArray({1000, 2, 3});
+        for(size_t i=0;i<bigtd.truthArray(fidx).size();i++)
+            bigtd.truthArray(fidx).data()[i]=i;
+
+        bigtd.writeToFile("bigfile1.djctd");
+        bigtd.writeToFile("bigfile2.djctd");
+        bigtd.writeToFile("bigfile3.djctd");
+        bigtd.writeToFile("bigfile4.djctd");
+
+        bigtd.clear();
+        return 1;
+    }
+
+
+    trainData<float> test;
+    std::vector<std::vector<int> > fs, ts, ws;
+    std::cout << "reading" << std::endl;
+    test.readShapesFromFile("bigfile1.djctd",fs,ts,ws);
+
+    std::cout << fs.size() << " " << ts.size() << "  " << ws.size() << std::endl;
+    for(const auto& s: fs)
+            std::cout << s << std::endl;
+    for(const auto& s: ts)
+                std::cout << s << std::endl;
+
+
+
+
+    std::vector<std::string> filenames = {"bigfile1.djctd",
+            "bigfile2.djctd", "bigfile3.djctd","bigfile4.djctd",
+            "bigfile1.djctd",
+                        "bigfile2.djctd", "bigfile3.djctd","bigfile4.djctd",
+                        "bigfile1.djctd",
+                                    "bigfile2.djctd", "bigfile3.djctd","bigfile4.djctd"};
+
+
+    trainDataGenerator<float> gen;
+
+    size_t batchsize=100;
+
+    std::cout << "set files" <<std::endl;
+    gen.setFileList(filenames);
+    gen.setBatchSize(batchsize);
+
+    size_t nepochs=3;
+    std::cout << "start" <<std::endl;
+
+    for(size_t e=0;e<nepochs;e++){
+        std::cout << "epoch " << e << std::endl;
+        //one epoch makes 4 batches
+        for(size_t i=0;i<gen.getNBatches();i++){
+            auto b = gen.getBatch();
+            std::cout << "batch with " << b.nElements() << " elements" <<std::endl;
+           // sleep(0.1);
+        }
+        gen.prepareNextEpoch();
+    }
+
+#endif
+    return 0;
+
+
+
+
+}
diff --git a/conda_env.yml b/conda_env.yml
new file mode 100644
index 0000000..38a4eee
--- /dev/null
+++ b/conda_env.yml
@@ -0,0 +1,30 @@
+name: DJC2
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python>=3.6,<3.8
+  - pip
+  - boost
+  - root
+  # - root=6.22.6=py36heda87ca_0
+  - h5py<3
+  - jupyter
+  - numba
+  - numpy
+  - tensorflow-gpu
+  #- tensorflow-gpu=2.2.0=h0d30ee6_0
+  - xrootd
+  - pip:
+    - awkward0
+    - matplotlib
+    - pandas
+    - cython
+    - Pillow
+    - root-numpy
+    - scipy
+    - scikit-learn
+    - seaborn
+    - setgpu
+    - tqdm
+    - uproot3
diff --git a/conda_env_exact.yml b/conda_env_exact.yml
new file mode 100644
index 0000000..73f9288
--- /dev/null
+++ b/conda_env_exact.yml
@@ -0,0 +1,280 @@
+name: DJC2
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=1_gnu
+  - _tflow_select=2.1.0=gpu
+  - absl-py=0.11.0=py36h5fab9bb_0
+  - afterimage=1.21=ha9998ff_1003
+  - aiohttp=3.7.3=py36h1d69622_0
+  - argon2-cffi=20.1.0=py36h1d69622_2
+  - astor=0.8.1=pyh9f0ad1d_0
+  - astunparse=1.6.3=py_0
+  - async-timeout=3.0.1=py_1000
+  - async_generator=1.10=py_0
+  - attrs=20.3.0=pyhd3deb0d_0
+  - backports=1.0=py_2
+  - backports.functools_lru_cache=1.6.1=py_0
+  - binutils=2.35.1=hdd6e379_0
+  - binutils_impl_linux-64=2.35.1=h17ad2fc_0
+  - binutils_linux-64=2.35=hc3fd857_29
+  - bleach=3.2.1=pyh9f0ad1d_0
+  - blinker=1.4=py_1
+  - boost=1.74.0=py36h8e82bdb_2
+  - boost-cpp=1.74.0=h9359b55_0
+  - brotlipy=0.7.0=py36he6145b8_1001
+  - bzip2=1.0.8=h7f98852_4
+  - c-ares=1.17.1=h36c2ea0_0
+  - c-compiler=1.1.3=h7f98852_0
+  - ca-certificates=2020.12.5=ha878542_0
+  - cachetools=4.1.1=py_0
+  - cairo=1.16.0=h9f066cc_1006
+  - certifi=2020.12.5=py36h5fab9bb_0
+  - cffi=1.14.4=py36hc120d54_1
+  - cfitsio=3.470=hb418390_7
+  - chardet=3.0.4=py36h9880bd3_1008
+  - click=7.1.2=pyh9f0ad1d_0
+  - compilers=1.1.3=ha770c72_0
+  - cryptography=3.2.1=py36h6ec43e4_0
+  - cudatoolkit=10.1.243=h036e899_6
+  - cudnn=7.6.5=cuda10.1_0
+  - cupti=10.1.168=0
+  - cxx-compiler=1.1.3=h4bd325d_0
+  - davix=0.7.6=hb44b51d_0
+  - decorator=4.4.2=py_0
+  - defusedxml=0.6.0=py_0
+  - entrypoints=0.3=pyhd8ed1ab_1003
+  - expat=2.2.9=he1b5a44_2
+  - fftw=3.3.8=nompi_h8cb7ab2_1114
+  - fontconfig=2.13.1=h7e3eb15_1002
+  - fortran-compiler=1.1.3=h1990efc_0
+  - freetype=2.10.4=h7ca028e_0
+  - fribidi=1.0.10=h36c2ea0_0
+  - ftgl=2.4.0=hbcb1f35_0
+  - gast=0.3.3=py_0
+  - gcc_impl_linux-64=9.3.0=h28f5a38_17
+  - gcc_linux-64=9.3.0=h7247604_29
+  - gdk-pixbuf=2.42.0=h0536704_1
+  - gettext=0.19.8.1=h0b5b191_1005
+  - gfortran_impl_linux-64=9.3.0=h2bb4189_17
+  - gfortran_linux-64=9.3.0=ha1c937c_29
+  - giflib=5.2.1=h36c2ea0_2
+  - gl2ps=1.4.2=h0708190_0
+  - glew=2.1.0=h9c3ff4c_2
+  - glib=2.66.3=h9c3ff4c_1
+  - google-auth=1.23.0=pyhd8ed1ab_0
+  - google-auth-oauthlib=0.4.1=py_2
+  - google-pasta=0.2.0=pyh8c360ce_0
+  - graphite2=1.3.13=h58526e2_1001
+  - graphviz=2.42.3=h0511662_0
+  - grpcio=1.34.0=py36h8e87921_0
+  - gsl=2.6=hf94e986_0
+  - gxx_impl_linux-64=9.3.0=h53cdd4c_17
+  - gxx_linux-64=9.3.0=h0d07fa4_29
+  - h5py=2.10.0=nompi_py36ha233d17_105
+  - harfbuzz=2.7.2=ha5b49bf_1
+  - hdf5=1.10.6=nompi_h6a2412b_1112
+  - icu=67.1=he1b5a44_0
+  - idna=2.10=pyh9f0ad1d_0
+  - idna_ssl=1.1.0=py36h9f0ad1d_1001
+  - importlib-metadata=3.1.1=pyhd8ed1ab_0
+  - importlib_metadata=3.1.1=hd8ed1ab_0
+  - ipykernel=5.3.4=py36he448a4c_1
+  - ipyparallel=6.3.0=py36h9f0ad1d_1
+  - ipython=5.8.0=py36_1
+  - ipython_genutils=0.2.0=py_1
+  - jinja2=2.11.2=pyh9f0ad1d_0
+  - jpeg=9d=h36c2ea0_0
+  - jsonschema=3.2.0=py_2
+  - jupyter_client=6.1.7=py_0
+  - jupyter_core=4.7.0=py36h5fab9bb_0
+  - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
+  - keras-preprocessing=1.1.0=py_0
+  - kernel-headers_linux-64=2.6.32=h77966d4_13
+  - krb5=1.17.2=h926e7f8_0
+  - ld_impl_linux-64=2.35.1=hed1e6ac_0
+  - libblas=3.9.0=3_openblas
+  - libcblas=3.9.0=3_openblas
+  - libcurl=7.71.1=hcdd3856_8
+  - libcxx=11.0.0=h0efe328_1
+  - libcxxabi=11.0.0=ha770c72_1
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libffi=3.3=h58526e2_1
+  - libgcc-devel_linux-64=9.3.0=hfd08b2a_17
+  - libgcc-ng=9.3.0=h5dbcf3e_17
+  - libgfortran-ng=9.3.0=he4bcb1c_17
+  - libgfortran5=9.3.0=he4bcb1c_17
+  - libglib=2.66.3=h1f3bc88_1
+  - libglu=9.0.0=he1b5a44_1001
+  - libgomp=9.3.0=h5dbcf3e_17
+  - libiconv=1.16=h516909a_0
+  - liblapack=3.9.0=3_openblas
+  - libllvm10=10.0.1=he513fc3_3
+  - libnghttp2=1.41.0=h8cfc5f6_2
+  - libopenblas=0.3.12=pthreads_h4812303_1
+  - libpng=1.6.37=h21135ba_2
+  - libprotobuf=3.14.0=h780b84a_0
+  - librsvg=2.50.2=h3442318_1
+  - libsodium=1.0.18=h36c2ea0_1
+  - libssh2=1.9.0=hab1572f_5
+  - libstdcxx-devel_linux-64=9.3.0=h4084dd6_17
+  - libstdcxx-ng=9.3.0=h2ae2ef3_17
+  - libtiff=4.1.0=h4f3a223_6
+  - libtool=2.4.6=h58526e2_1007
+  - libuuid=2.32.1=h7f98852_1000
+  - libwebp-base=1.1.0=h36c2ea0_3
+  - libxcb=1.13=h14c3975_1002
+  - libxml2=2.9.10=h68273f3_2
+  - llvmlite=0.35.0=py36h05121d2_0
+  - lz4-c=1.9.2=he1b5a44_3
+  - markdown=3.3.3=pyh9f0ad1d_0
+  - markupsafe=1.1.1=py36he6145b8_2
+  - metakernel=0.27.5=pyhd3deb0d_0
+  - mistune=0.8.4=py36h1d69622_1002
+  - multidict=4.7.5=py36h8c4c3a4_2
+  - nbclient=0.5.1=py_0
+  - nbconvert=6.0.7=py36h5fab9bb_3
+  - nbformat=5.0.8=py_0
+  - ncurses=6.2=h58526e2_4
+  - nest-asyncio=1.4.3=pyhd8ed1ab_0
+  - notebook=6.1.5=py36h5fab9bb_0
+  - numba=0.52.0=py36h284efc9_0
+  - numpy=1.19.4=py36h8732dcd_1
+  - oauthlib=3.0.1=py_0
+  - openssl=1.1.1h=h516909a_0
+  - opt_einsum=3.3.0=py_0
+  - packaging=20.7=pyhd3deb0d_0
+  - pandoc=2.11.2=h36c2ea0_0
+  - pandocfilters=1.4.2=py_1
+  - pango=1.42.4=h69149e4_5
+  - pcre=8.44=he1b5a44_0
+  - pexpect=4.8.0=pyh9f0ad1d_2
+  - pickleshare=0.7.5=py_1003
+  - pip=20.3.1=pyhd8ed1ab_0
+  - pixman=0.40.0=h36c2ea0_0
+  - portalocker=1.7.0=py36h9f0ad1d_1
+  - prometheus_client=0.9.0=pyhd3deb0d_0
+  - prompt_toolkit=1.0.15=py_1
+  - protobuf=3.14.0=py36hc4f0c31_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - ptyprocess=0.6.0=py_1001
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.7=py_0
+  - pycparser=2.20=pyh9f0ad1d_2
+  - pygments=2.7.2=py_0
+  - pyjwt=1.7.1=py_0
+  - pyopenssl=20.0.0=pyhd8ed1ab_0
+  - pyparsing=2.4.7=pyh9f0ad1d_0
+  - pyrsistent=0.17.3=py36h1d69622_1
+  - pysocks=1.7.1=py36h9880bd3_2
+  - pythia8=8.244=py36hc4f0c31_2
+  - python=3.6.11=hffdb5ce_3_cpython
+  - python-dateutil=2.8.1=py_0
+  - python_abi=3.6=1_cp36m
+  - pyzmq=20.0.0=py36h2b1bd32_1
+  - readline=8.0=he28a2e2_2
+  - requests=2.25.0=pyhd3deb0d_0
+  - requests-oauthlib=1.3.0=pyh9f0ad1d_0
+  - root=6.22.6=py36heda87ca_0
+  - root-binaries=6.22.6=py36heda87ca_0
+  - root-dependencies=6.22.6=py36heda87ca_0
+  - root_base=6.22.6=py36h74e3adb_0
+  - rsa=4.6=pyh9f0ad1d_0
+  - send2trash=1.5.0=py_0
+  - simplegeneric=0.8.1=py_1
+  - six=1.15.0=pyh9f0ad1d_0
+  - sqlite=3.34.0=h74cdb3f_0
+  - sysroot_linux-64=2.12=h77966d4_13
+  - tbb=2020.2=hc9558a2_0
+  - tbb-devel=2020.2=hc9558a2_0
+  - tensorboard=2.4.0=pyhd8ed1ab_0
+  - tensorboard-plugin-wit=1.7.0=pyh9f0ad1d_0
+  - tensorflow=2.2.0=gpu_py36hf933387_0
+  - tensorflow-base=2.2.0=gpu_py36h8a81be8_0
+  - tensorflow-estimator=2.2.0=pyh95af2a2_0
+  - tensorflow-gpu=2.2.0=h0d30ee6_0
+  - termcolor=1.1.0=py_2
+  - terminado=0.9.1=py36h5fab9bb_1
+  - testpath=0.4.4=py_0
+  - tk=8.6.10=hed695b0_1
+  - tornado=6.1=py36h1d69622_0
+  - traitlets=4.3.3=py36h9f0ad1d_1
+  - typing-extensions=3.7.4.3=0
+  - typing_extensions=3.7.4.3=py_0
+  - urllib3=1.25.11=py_0
+  - vdt=0.4.3=h9c3ff4c_0
+  - wcwidth=0.2.5=pyh9f0ad1d_2
+  - webencodings=0.5.1=py_1
+  - werkzeug=1.0.1=pyh9f0ad1d_0
+  - wheel=0.36.1=pyhd3deb0d_0
+  - wrapt=1.12.1=py36h1d69622_2
+  - xorg-fixesproto=5.0=h14c3975_1002
+  - xorg-kbproto=1.0.7=h14c3975_1002
+  - xorg-libice=1.0.10=h516909a_0
+  - xorg-libsm=1.2.3=h84519dc_1000
+  - xorg-libx11=1.6.12=h516909a_0
+  - xorg-libxau=1.0.9=h14c3975_0
+  - xorg-libxcursor=1.2.0=h516909a_0
+  - xorg-libxdmcp=1.1.3=h516909a_0
+  - xorg-libxext=1.3.4=h516909a_0
+  - xorg-libxfixes=5.0.3=h516909a_1004
+  - xorg-libxft=2.3.3=h71203ad_0
+  - xorg-libxpm=3.5.13=h516909a_0
+  - xorg-libxrender=0.9.10=h516909a_1002
+  - xorg-libxt=1.1.5=h516909a_1003
+  - xorg-renderproto=0.11.1=h14c3975_1002
+  - xorg-xextproto=7.3.0=h14c3975_1002
+  - xorg-xproto=7.0.31=h7f98852_1007
+  - xrootd=5.0.3=py36h4ad382e_0
+  - xxhash=0.8.0=h7f98852_1
+  - xz=5.2.5=h516909a_1
+  - yarl=1.6.3=py36h1d69622_0
+  - zeromq=4.3.3=h58526e2_3
+  - zipp=3.4.0=py_0
+  - zlib=1.2.11=h516909a_1010
+  - zstd=1.4.5=h6597ccf_2
+  - pip:
+    - awkward0==0.15.1
+    - blessings==1.7
+    - cached-property==1.5.2
+    - cycler==0.10.0
+    - cython==0.29.21
+    - easydict==1.9
+    - future==0.18.2
+    - gensim==3.8.3
+    - gpustat==0.6.0
+    - imageio==2.9.0
+    - ipywidgets==7.5.1
+    - joblib==0.17.0
+    - jupyter==1.0.0
+    - jupyter-console==5.2.0
+    - kiwisolver==1.3.1
+    - matplotlib==3.3.3
+    - networkx==2.5
+    - nvidia-ml-py3==7.352.0
+    - opencv-python==4.4.0.46
+    - pandas==1.1.4
+    - pillow==8.0.1
+    - psutil==5.7.3
+    - pytz==2020.4
+    - pywavelets==1.1.1
+    - qtconsole==5.0.1
+    - qtpy==1.9.0
+    - root-numpy==4.8.0
+    - scikit-image==0.17.2
+    - scikit-learn==0.23.2
+    - scipy==1.5.4
+    - seaborn==0.11.0
+    - setgpu==0.0.7
+    - setuptools==51.0.0
+    - smart-open==4.0.1
+    - threadpoolctl==2.1.0
+    - tifffile==2020.9.3
+    - tqdm==4.54.1
+    - uproot==4.0.0
+    - uproot3==3.14.1
+    - uproot3-methods==0.10.0
+    - widgetsnbextension==3.5.1
diff --git a/conversion/__init__.py b/conversion/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/conversion/conversion.py b/conversion/conversion.py
new file mode 100644
index 0000000..a24a0ee
--- /dev/null
+++ b/conversion/conversion.py
@@ -0,0 +1,13 @@
+try:
+    import datastructures
+    from datastructures import *
+except ImportError:
+    print('datastructure modules not found. Please define a DeepJetCore submodule')
+
+class_options=[]
+import inspect, sys
+for name, obj in inspect.getmembers(sys.modules['datastructures']):
+    if inspect.isclass(obj) and 'TrainData' in name:
+        class_options.append(obj)
+      
+class_options = dict((str(i).split("'")[1].split('.')[-1], i) for i in class_options)
diff --git a/customObjects.py b/customObjects.py
new file mode 100644
index 0000000..604d321
--- /dev/null
+++ b/customObjects.py
@@ -0,0 +1,40 @@
+    
+from DeepJetCore.DJCLosses import *
+from DeepJetCore.DJCLayers import *
+import imp
+try:
+    imp.find_module('Losses')
+    from Losses import *
+except ImportError as e:
+    print ('No Losses module found, ignoring at your own risk. The following error occured:')
+    print(e)
+    print('//////////////////')
+    global_loss_list = {}
+
+try:
+    imp.find_module('Layers')
+    from Layers import *
+except ImportError as e:
+    print ('No Layers module found, ignoring at your own risk. The following error occured:')
+    print(e)
+    print('//////////////////')
+    global_layers_list = {}
+
+try:
+    imp.find_module('Metrics')
+    from Metrics import *
+except ImportError as e:
+    print ('No metrics module found, ignoring at your own risk. The following error occured:')
+    print(e)
+    print('//////////////////')
+    global_metrics_list = {}    
+
+def get_custom_objects():
+    
+    custom_objs = {}
+    custom_objs.update(djc_global_loss_list)
+    custom_objs.update(djc_global_layers_list)
+    custom_objs.update(global_loss_list)
+    custom_objs.update(global_layers_list)
+    custom_objs.update(global_metrics_list)
+    return custom_objs
diff --git a/dataPipeline.py b/dataPipeline.py
new file mode 100644
index 0000000..c9969e0
--- /dev/null
+++ b/dataPipeline.py
@@ -0,0 +1,78 @@
+
+from DeepJetCore.compiled.c_trainDataGenerator import trainDataGenerator
+import numpy as np
+
+class TrainDataGenerator(trainDataGenerator):
+    
+    def __init__(self, 
+                 pad_rowsplits=False, 
+                 fake_truth=None,
+                 dict_output=False,
+                 cast_to = None):
+        
+        trainDataGenerator.__init__(self)
+        #self.extend_truth_list_by = extend_truth_list_by
+        self.pad_rowsplits=pad_rowsplits
+        self.dict_output = dict_output
+        self.fake_truth = None
+        self.cast_to = cast_to
+        if fake_truth is not None:
+            if isinstance(fake_truth, int):
+                self.fake_truth = [np.array([0],dtype='float32') 
+                                             for _ in range(fake_truth)]
+            elif isinstance(fake_truth, list):
+                etl={}
+                for e in fake_truth:
+                    if isinstance(e,str):
+                        etl[e]=np.array([0],dtype='float32') 
+                    else:
+                        raise ValueError("TrainDataGenerator: only accepts an int or list of strings to extend truth list")
+                self.fake_truth = etl
+    
+    def feedTrainData(self):
+        for _ in range(self.getNBatches()):
+            td = self.getBatch()
+            if self.cast_to is not None:
+                td.__class__ = self.cast_to
+            yield td
+        
+    def feedNumpyData(self):
+        
+        fnames=[]
+        tnames=[]
+        wnames=[]
+        
+        for b in range(self.getNBatches()):
+            try:
+                data = self.getBatch()
+                
+                if not len(fnames):
+                    fnames = data.getNumpyFeatureArrayNames()
+                    tnames = data.getNumpyTruthArrayNames()
+                    wnames = data.getNumpyWeightArrayNames()
+                
+                # These calls will transfer data to numpy and delete the respective SimpleArray
+                # instances for efficiency.
+                # therefore extracting names etc needs to happen before!
+                xout = data.transferFeatureListToNumpy(self.pad_rowsplits)
+                yout = data.transferTruthListToNumpy(self.pad_rowsplits)
+                wout = data.transferWeightListToNumpy(self.pad_rowsplits)
+                
+                if self.dict_output:
+                    xout = {k:v for k,v in zip(fnames,xout)}
+                    yout = {k:v for k,v in zip(tnames,yout)}
+                    wout = {k:v for k,v in zip(wnames,wout)}
+                
+                if self.fake_truth is not None:
+                    yout=self.fake_truth
+                
+                out = (xout,yout)
+                if len(wout)>0:
+                    out = (xout,yout,wout)
+                yield out
+            except Exception as e:
+                print("TrainDataGenerator: an exception was raised in batch",b," out of ", self.getNBatches(),', expection: ', e)
+                raise e
+            
+    def feedTorchTensors(self):
+        pass
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..03f8bdf
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,97 @@
+ARG BASE_IMAGE_TAG
+FROM cernml4reco/djcbase:$BASE_IMAGE_TAG
+
+#update if needed
+RUN pip3 install --upgrade pip
+RUN apt-get update  --fix-missing
+
+########### torch stuff: pretty slow so do that first
+
+#moved from base
+RUN pip3 install future gensim jupyter prompt-toolkit
+EXPOSE 8888  
+
+# removed torch (geometric) for now, as nobody seems to use it
+# RUN pip3 install torch==1.10.2
+# RUN pip3 install install torch-scatter torch-sparse torch-cluster \
+#                          torch-spline-conv torch-geometric \
+#                          -f https://data.pyg.org/whl/torch-1.10.2+cu111.html
+# 
+
+
+RUN apt-get install -y vim
+
+RUN pip3 install bayesian-optimization mgzip mysql-connector-python pyjet pyyaml
+
+################################################################################
+# Tensorflow
+
+# TF needs gcc7 for custom ops
+# RUN apt-get install -y gcc-7 g++-7 && \
+#     update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 7 && \
+#     update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 7
+RUN g++ --version && gcc --version
+    
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    pip3 --no-cache-dir install tensorflow-gpu==2.4.4  
+    
+RUN cd /usr/local/lib/python3.6/dist-packages/tensorflow/include/third_party && \
+    mkdir gpus && \
+    cd gpus && \
+    ln -s /usr/local/cuda-11.1 cuda   
+
+#fix for TF 2.4.0
+RUN ln -s /usr/local/cuda-11.1/lib64/libcusolver.so.11 /usr/local/cuda-11.1/lib64/libcusolver.so.10
+ENV LD_LIBRARY_PATH="/usr/local/cuda-11.1/lib64:${LD_LIBRARY_PATH}"
+    
+
+############# some more useful packages, mostly requested by users
+
+
+
+################################################################################
+# the actual DeepJetCore. Make sure this gets built every time
+
+# prepare the environment (only starting py >3.6)
+# RUN cd /usr/lib/x86_64-linux-gnu &&\
+#    ln -s libboost_numpy38.so libboost_numpy3.so && \
+#    ln -s libboost_python38.so libboost_python3.so
+
+
+ARG BUILD_DATE
+LABEL org.label-schema.build-date=$BUILD_DATE
+ARG COMMIT
+LABEL djc.commit=$COMMIT
+
+ENV DEEPJETCORE /usr/share/DJC/DeepJetCore
+
+RUN ldconfig && \
+    cd /usr/share && \
+    mkdir DJC && \
+    cd DJC && \
+    git clone https://github.com/DL4Jets/DeepJetCore && \
+    cd DeepJetCore  && git checkout $COMMIT 
+    
+    
+RUN cd $DEEPJETCORE &&\
+    source docker_env.sh && \
+    cd compiled && \
+    make 
+
+
+ENV PYTHONPATH="/usr/share/DJC/DeepJetCore/../:${PYTHONPATH}"
+ENV LD_LIBRARY_PATH="/usr/share/DJC/DeepJetCore/compiled:${LD_LIBRARY_PATH}"
+ENV PATH="/usr/share/DJC/DeepJetCore/bin:${PATH}"
+
+
+RUN cd /usr/share/DJC/DeepJetCore/testing/unit && \
+    python3 test.py
+    
+### ad hoc additions to be moved towards base image in the future
+RUN pip3 install hist
+
+
+
+   
+    
diff --git a/docker/Dockerfile_base b/docker/Dockerfile_base
new file mode 100644
index 0000000..be0a89a
--- /dev/null
+++ b/docker/Dockerfile_base
@@ -0,0 +1,73 @@
+FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04 
+
+SHELL ["/bin/bash", "-c"]
+
+RUN sed -i "s,# deb http://archive.canonical.com/ubuntu,deb http://archive.canonical.com/ubuntu,g" /etc/apt/sources.list
+RUN apt-get update 
+
+RUN apt-get install -y python3.6-dev python3-pip
+RUN pip3 install --upgrade pip
+RUN pip3 install --upgrade setuptools
+RUN ldconfig
+
+#basic user tools
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get install -y git wget nano emacs \
+    evince eog ffmpeg unzip zsh python3-tk locales \
+    htop
+
+#add some kerberos for convenience
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get install -y krb5-user 
+    
+#use the standard cern config
+COPY krb5.conf /etc/krb5.conf
+
+############  root part
+RUN apt-get install -y libx11-dev libxpm-dev libxft-dev libxext-dev
+RUN apt-get install -y cmake openssl
+
+# RUN mkdir /temproot && \
+#     cd /temproot && \
+#     git clone --branch v6-20-00-patches http://github.com/root-project/root.git 
+# RUN cd /temproot && \
+#     mkdir build && \
+#     cd build && \
+#     cmake ../root 
+# RUN cd /temproot/build && \
+#     make -j5 && \
+#     make install && \
+#     rm -rf /temproot
+
+RUN mkdir /temproot && \
+    cd /temproot && \
+    wget https://cern.ch/amadio/root/root_6.22.00_ubuntu18_amd64.deb > /dev/null 2>&1
+    
+RUN cd /temproot && \
+    apt-get install -y ./root_6.22.00_ubuntu18_amd64.deb && \
+    rm -rf /temproot
+ 
+
+############  END root part
+
+### boost
+RUN apt-get install -y libboost-all-dev
+
+
+#for singularity panic
+RUN adduser  --disabled-password --gecos ""  dummyuser
+RUN apt-get install -y vim
+
+#some tex things
+RUN apt install -y texlive ghostscript dvipng
+
+############  PIP packages
+
+RUN pip3 install numpy scikit-learn scikit-image \
+                 h5py matplotlib uproot3 uproot Pillow \
+                 scipy seaborn opencv-python easydict tqdm Cython numba \
+                 gpustat setGPU plotly dash awkward
+                 
+# Add root_numpy warning
+RUN mkdir -p  /usr/local/lib/python3.6/dist-packages/root_numpy && \
+    echo "raise ImportError(\"WARNING: root_numpy was removed from the DeepJetCore container as it has not been maintained for a while and is not comaptible with python > 3.7. Please switch to uproot or similar.\")"  > /usr/local/lib/python3.6/dist-packages/root_numpy/__init__.py
diff --git a/docker/build.sh b/docker/build.sh
new file mode 100644
index 0000000..acc3e04
--- /dev/null
+++ b/docker/build.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/bash
+
+BASE_IMAGE_TAG=cu11.6
+
+COMMIT=manual
+
+
+docker build $FORCE_NO_CACHE -t cernml4reco/deepjetcore3:latest . \
+       --build-arg BUILD_DATE="$(date)" --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG \
+       --build-arg COMMIT=$COMMIT
\ No newline at end of file
diff --git a/docker/build_if_needed.sh b/docker/build_if_needed.sh
new file mode 100755
index 0000000..1e97b4a
--- /dev/null
+++ b/docker/build_if_needed.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+FORCE="no"
+if [ $1 ]
+then 
+FORCE=$1
+fi
+
+git fetch
+if [ $(git rev-parse HEAD) != $(git rev-parse @{u}) ] || [ $FORCE != "no" ]
+then
+
+
+  COMMIT=$(git log -n 1 --pretty=format:"%H")
+  
+  echo "building container  for commit ${COMMIT}"
+  
+  OLD_BASE_ID=$(git rev-parse HEAD:docker/Dockerfile_base) 
+  OLD_ID=$(git rev-parse HEAD:docker/Dockerfile) 
+  git pull
+  NEW_BASE_ID=$(git rev-parse HEAD:docker/Dockerfile_base) 
+  NEW_ID=$(git rev-parse HEAD:docker/Dockerfile) 
+  
+  source image_tags.sh #in case this was updated in the pull
+  
+  BASE_IMAGE_TAG="${BASE_IMAGE_TAG}" # as this is a bleeding edge build
+  
+  if [ $OLD_BASE_ID != $NEW_BASE_ID ] || [ $FORCE == "force_base" ]
+  then
+    echo "base image changed from ${OLD_BASE_ID} to ${NEW_BASE_ID}, rerunning base build"
+    docker build --no-cache=true -t cernml4reco/djcbase:$BASE_IMAGE_TAG -f Dockerfile_base .  > base_build.log 2>&1
+    
+    if [ $? != 0 ]; 
+    then 
+       BASE_FAIL=true
+    else
+       docker push cernml4reco/djcbase:$BASE_IMAGE_TAG  > base_push.log  2>&1
+       if [ $? != 0 ]; 
+       then
+           BASE_PUSH_FAIL=true
+       fi
+    fi
+    
+    subject="Subject: base build ${BASE_IMAGE_TAG} finished"
+    if [ $BASE_FAIL ]
+    then
+       subject="Subject: !! base build FAILED"
+    fi
+    if [ $BASE_PUSH_FAIL ]
+    then
+       subject="Subject: !! base push FAILED"
+    fi
+    
+    { echo $subject ; 
+      cat base_build.log ; 
+      echo "" ;
+      echo "################# push log ##############" ; 
+      echo "" ;
+      cat base_push.log ; } | sendmail jkiesele@cern.ch;
+      
+     FORCE_NO_CACHE=" --no-cache=true "
+    
+  fi
+  
+  if [ $BASE_FAIL ] || [ $BASE_PUSH_FAIL ]
+  then
+     exit
+  fi
+  
+  # this is an auto build, so by definition, the build is not a release build
+  # if the docker file changed, tag it as experimental and ask 
+  TAG=latest
+  if [ $OLD_ID != $NEW_ID ]
+  then
+      TAG=exp
+  fi
+  
+  echo "Building with tag ${TAG}" > build.log
+  
+  # only force no cache if base image has been rebuilt
+  docker build $FORCE_NO_CACHE -t cernml4reco/deepjetcore3:$TAG . \
+       --build-arg BUILD_DATE="$(date)" --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG \
+       --build-arg COMMIT=$COMMIT   >> build.log 2>&1
+  if [ $? != 0 ]; 
+  then 
+     FAIL=true
+  else
+     
+     docker push cernml4reco/deepjetcore3:$TAG  > push.log 2>&1
+     
+     if [ $? != 0 ]; 
+     then
+         PUSH_FAIL=true
+     fi
+  fi
+    
+  subject="Subject: build ${TAG} finished"
+  if [ $FAIL ]
+  then
+     subject="Subject: !! DJC build FAILED"
+  fi
+  if [ $PUSH_FAIL ]
+  then
+     subject="Subject: !! DJC push FAILED"
+     if [ $OLD_ID != $NEW_ID ]
+     then
+         subject="Subject: DJC experimental push failed (${TAG})"
+     fi
+  fi
+  
+  { echo $subject ; 
+    cat build.log ; 
+    echo "" ;
+    echo "################# push log ##############" ; 
+    echo "" ;
+    cat push.log ; } | sendmail jkiesele@cern.ch;
+      
+      
+fi
diff --git a/docker/image_tags.sh b/docker/image_tags.sh
new file mode 100644
index 0000000..599c38f
--- /dev/null
+++ b/docker/image_tags.sh
@@ -0,0 +1,4 @@
+
+BASE_IMAGE_TAG=cu11.1_djc3.3.X
+TAG=3.3.0
+#TAG=latest
\ No newline at end of file
diff --git a/docker/krb5.conf b/docker/krb5.conf
new file mode 100644
index 0000000..5416539
--- /dev/null
+++ b/docker/krb5.conf
@@ -0,0 +1,88 @@
+; AD  : This Kerberos configuration is for CERN's Active Directory realm
+; The line above this is magic and is used by cern-config-keytab. Do
+; not remove.
+
+; Installed with puppet from a series of
+; template fragments.
+
+; /etc/krb5.conf
+
+[libdefaults]
+ default_realm = CERN.CH
+ ticket_lifetime = 25h
+ renew_lifetime = 120h
+ forwardable = true 
+ proxiable = true
+ default_tkt_enctypes = arcfour-hmac-md5 aes256-cts aes128-cts des3-cbc-sha1 des-cbc-md5 des-cbc-crc
+ chpw_prompt = true
+ rdns = true
+
+[appdefaults]
+pam = {
+         external = true
+         krb4_convert = false
+         krb4_convert_524 = false
+         krb4_use_as_req = false
+}
+
+[domain_realm]
+geonosis.cern.ch = CERN.CH
+.cern.ch = CERN.CH
+.fnal.gov = FNAL.GOV
+.hep.man.ac.uk = HEP.MAN.AC.UK
+.in2p3.fr = IN2P3.FR
+
+[realms]
+CERN.CH  = {
+  default_domain = cern.ch
+  kpasswd_server = cerndc.cern.ch
+  admin_server = cerndc.cern.ch
+  dns_lookup_kdc = false
+  master_kdc = cerndc.cern.ch
+  kdc = cerndc.cern.ch
+  v4_name_convert = {
+     host = {
+        rcmd = host
+     }
+  }
+}
+
+FNAL.GOV  = {
+  default_domain = fnal.gov
+  admin_server = krb-fnal-admin.fnal.gov
+  kdc = krb-fnal-fcc3.fnal.gov:88
+  kdc = krb-fnal-2.fnal.gov:88
+  kdc = krb-fnal-3.fnal.gov:88
+  kdc = krb-fnal-1.fnal.gov:88
+  kdc = krb-fnal-4.fnal.gov:88
+  kdc = krb-fnal-enstore.fnal.gov:88
+  kdc = krb-fnal-fg2.fnal.gov:88
+  kdc = krb-fnal-cms188.fnal.gov:88
+  kdc = krb-fnal-cms204.fnal.gov:88
+  kdc = krb-fnal-d0online.fnal.gov:88
+}
+
+HEP.MAN.AC.UK  = {
+  default_domain = hep.man.ac.uk
+  kpasswd_server = afs4.hep.man.ac.uk
+  admin_server = afs4.hep.man.ac.uk
+  kdc = afs1.hep.man.ac.uk
+  kdc = afs2.hep.man.ac.uk
+  kdc = afs3.hep.man.ac.uk
+  kdc = afs4.hep.man.ac.uk
+}
+
+IN2P3.FR  = {
+  default_domain = in2p3.fr
+  kpasswd_server = kerberos-admin.in2p3.fr
+  admin_server = kerberos-admin.in2p3.fr
+  kdc = kerberos-1.in2p3.fr
+  kdc = kerberos-2.in2p3.fr
+  kdc = kerberos-3.in2p3.fr
+}
+
+KFKI.HU  = {
+  admin_server = kerberos.kfki.hu
+  kdc = kerberos.kfki.hu
+}
+
diff --git a/docker_env.sh b/docker_env.sh
new file mode 100644
index 0000000..37513bc
--- /dev/null
+++ b/docker_env.sh
@@ -0,0 +1,9 @@
+export DEEPJETCORE=`pwd`
+export PATH=`pwd`/bin:$PATH
+export PYTHONPATH=`pwd`/../:$PYTHONPATH
+if [ $LD_LIBRARY_PATH ]
+then
+    export LD_LIBRARY_PATH=`pwd`/compiled/:$LD_LIBRARY_PATH
+else
+    export LD_LIBRARY_PATH=`pwd`/compiled/
+fi
diff --git a/environment/README.md b/environment/README.md
deleted file mode 100644
index 78a58c6..0000000
--- a/environment/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-Help to setup the environment on your machine
-=============================================
-
-On the gpu, only do:
-
-```
-source gpu_env.sh
-```
-If you have installed miniconda on lxplus, you need to remove its directory from your PATH environment!
-
-
-On Lxplus/Mac, you need to install miniconda in your workspace see [1]. 
-
-After that it is sufficient to run (zsh, bash, sh):
-
-```
-source env.sh (Mac)
-source lxplus_env.sh (Lxplus)
-```
-
-In addition, the compiled modules need to be compiled. 
-This cannot be done directly on the GPU machines. please compile on lxplus and use the compiled libraries on the GPU
-After sourcing the environment scripts, run 'make' in the 'modules' directory.
-
-
-
-[1]
-The code is tested and run using package management with anaconda or miniconda:
-https://www.continuum.io/anaconda-overview
-On lxplus, miniconda is recommended, since it needs less disk space!
-Please make sure, conda is added to your path (you will be prompted). Answer with "yes" or take care yourself
-that the command 'which conda' returns the path of your conda installation before you use the package.
-
-If you installed anaconda/miniconda, you can use the .conda file to install the version we used. 
-The setupEnv.sh is a small macro that does the installation and environment definition.
-Please call:
-
- ./setupEnv.sh deepjetLinux3.conda 
-
-if you have GPU access also do:
-
- ./setupEnv.sh deepjetLinux3.conda gpu
-
-Each time before running, the environment should be activated and the PYTHONPATH needs to be adapted.
-This can be easily done for zsh/bash/sh shells with 
-
-```
-source lxplus_env (lxplus)
-source gpu_env (GPU)
-```
-
-The script needs to be called from this directory
-
-The first time this package is checked out or if compiled modules changed, please run 
-```
-make
-```
-in the ```modules``` directory on lxplus7
-
-If the lxplus installation fails
-================================
-(the following should not be necessary anymore, but it is left for reference)
-
-For unknown reasons the lxplus environment installation may fail with the following error:
-
-```text
-Traceback (most recent call last):
-  File "/afs/cern.ch/work/m/mverzett/miniconda3/envs/deepjetLinux/bin/pip", line 4, in <module>
-    import pip
-ImportError: No module named 'pip'
-```
-
-To overcome this you can try to adapt and run the following commands, **they have only been tested to work on lxplus7**:
-```
-#remove the env that was created
-rm -rf /afs/cern.ch/work/<first letter of USER>/$USER/miniconda3/envs/deepjetLinux3
-conda create --name deepjetLinux3
-source activate deepjetLinux3
-conda install pip
-source deactivate deepjetLinux3
-conda install --name deepjetLinux --file deepjetLinux3.conda
-pip install -r deepjetLinux3.pip
-```
diff --git a/environment/activateROOT.sh b/environment/activateROOT.sh
deleted file mode 100644
index 9db0bbc..0000000
--- a/environment/activateROOT.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-# hack to make root-numpy and ROOT work properly in python 
-# this is necessary because the original package that is checked
-# out by conda is broken
-
-cd ${CONDA_PREFIX}
-source bin/thisroot.sh
-cd -
-
-echo "Activate: ROOT has been sourced. Environment settings are ready. "
-echo "ROOTSYS="${ROOTSYS}
-
-if [ -n "${LD_LIBRARY_PATH}" ]; then
-     unset LD_LIBRARY_PATH
-fi
-
-
-if [ -n "${DYLD_LIBRARY_PATH}" ]; then
-     unset DYLD_LIBRARY_PATH
-fi
-
-
diff --git a/environment/deepjetLinux3.conda b/environment/deepjetLinux3.conda
deleted file mode 100644
index 260dfd7..0000000
--- a/environment/deepjetLinux3.conda
+++ /dev/null
@@ -1,57 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: linux-64
-@EXPLICIT
-https://repo.continuum.io/pkgs/free/linux-64/boost-1.61.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/cloog-0.18.0-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/dbus-1.10.10-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/expat-2.1.0-0.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/fftw-3.3.4-2.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/fontconfig-2.12.1-3.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/freetype-2.5.5-2.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/gcc-4.8.2-25.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/glib-2.43.0-1.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/glibc-2.12.2-3.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/gmp-5.1.2-3.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/graphviz-2.38.0-4.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/gsl-1.16-2.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/gst-plugins-base-1.8.0-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/gstreamer-1.8.0-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/hdf5-1.8.17-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/icu-54.1-0.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/isl-0.12.2-2.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/jbig-2.1-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/jpeg-9b-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libffi-3.2.1-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libgcc-5.2.0-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libgfortran-3.0.0-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libiconv-1.14-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libpng-1.6.27-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libtiff-4.0.6-3.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libxcb-1.12-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/libxml2-2.9.4-0.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/linux-headers-2.6.32-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/mkl-2017.0.1-0.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/mpc-1.0.1-2.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/mpfr-3.1.2-2.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/numexpr-2.6.2-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/numpy-1.13.1-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/openssl-1.0.1k-1.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/pcre-8.37-5.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/pip-9.0.1-py27_1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/pytables-3.3.0-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/python-2.7.5-3.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/qt-5.6.2-3.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/readline-6.2-2.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/root-6.04-py2.7_gcc4.8.2.tar.bz2
-https://conda.anaconda.org/NLeSC/linux-64/root-numpy-4.4.0-root6.04_py2.7.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/scikit-learn-0.18.2-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/scipy-0.19.1-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/setuptools-27.2.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/six-1.10.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/sqlite-3.13.0-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/system-5.8-2.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/tk-8.5.18-0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/wheel-0.29.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/xz-5.2.2-1.tar.bz2
-https://repo.continuum.io/pkgs/free/linux-64/zlib-1.2.8-3.tar.bz2
diff --git a/environment/deepjetLinux3.pip b/environment/deepjetLinux3.pip
deleted file mode 100644
index 3aa84a2..0000000
--- a/environment/deepjetLinux3.pip
+++ /dev/null
@@ -1,15 +0,0 @@
-cycler==0.10.0
-funcsigs==1.0.2
-functools32==3.2.3.post2
-h5py==2.6.0
-tensorflow==1.0.1
-Keras==2.0.0
-matplotlib==2.0.0
-mock==2.0.0
-pbr==2.0.0
-protobuf==3.2.0
-pyparsing==2.2.0
-python-dateutil==2.6.0
-pytz==2016.10
-PyYAML==3.12
-subprocess32==3.2.7
diff --git a/environment/deepjetOSX.conda b/environment/deepjetOSX.conda
deleted file mode 100644
index dad6c82..0000000
--- a/environment/deepjetOSX.conda
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: osx-64
-@EXPLICIT
-https://repo.continuum.io/pkgs/free/osx-64/boost-1.61.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/ca-certificates-2017.08.26-ha1e5d58_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/certifi-2018.1.18-py27_0.tar.bz2
-https://conda.anaconda.org/NLeSC/osx-64/fftw-3.3.4-0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/freetype-2.8-h12048fb_1.tar.bz2
-https://conda.anaconda.org/NLeSC/osx-64/gsl-1.16-2.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/hdf5-1.8.17-2.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/icu-58.2-h4b95b61_1.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/intel-openmp-2018.0.0-h8158457_8.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/krb5-1.14.2-h9a779f2_6.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libcxx-4.0.1-h579ed51_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libcxxabi-4.0.1-hebd6815_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libedit-3.1-hb4e282d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libffi-3.2.1-h475c297_4.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libiconv-1.15-hdd342a3_7.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libpng-1.6.34-he12f830_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/libxml2-2.9.7-hab757c2_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/mkl-2017.0.4-h1fae6ae_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/ncurses-6.0-hd04f020_2.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/numexpr-2.6.2-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/numpy-1.13.1-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/openssl-1.0.2n-hdbc3d79_0.tar.bz2
-https://conda.anaconda.org/NLeSC/osx-64/pcre-8.35-7.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/pip-9.0.1-py27h1567d89_4.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/pytables-3.4.2-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/python-2.7.13-h89fad4f_16.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/readline-7.0-hc1231fa_4.tar.bz2
-https://conda.anaconda.org/NLeSC/osx-64/root-6.04-py2.7_clang503.tar.bz2
-https://conda.anaconda.org/NLeSC/osx-64/root-numpy-4.4.0-root6.04_py2.7.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/scikit-learn-0.18.2-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/free/osx-64/scipy-0.19.1-np113py27_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/setuptools-38.4.0-py27_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/six-1.11.0-py27h7252ba3_1.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/sqlite-3.22.0-h3efe00b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/tk-8.6.7-h35a86e2_3.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/wheel-0.30.0-py27h677a027_1.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/xz-5.2.3-h0278029_2.tar.bz2
-https://repo.continuum.io/pkgs/main/osx-64/zlib-1.2.11-hf3cbc9b_2.tar.bz2
diff --git a/environment/deepjetOSX.pip b/environment/deepjetOSX.pip
deleted file mode 100644
index 3aa84a2..0000000
--- a/environment/deepjetOSX.pip
+++ /dev/null
@@ -1,15 +0,0 @@
-cycler==0.10.0
-funcsigs==1.0.2
-functools32==3.2.3.post2
-h5py==2.6.0
-tensorflow==1.0.1
-Keras==2.0.0
-matplotlib==2.0.0
-mock==2.0.0
-pbr==2.0.0
-protobuf==3.2.0
-pyparsing==2.2.0
-python-dateutil==2.6.0
-pytz==2016.10
-PyYAML==3.12
-subprocess32==3.2.7
diff --git a/environment/setupEnv.sh b/environment/setupEnv.sh
deleted file mode 100755
index f7654a0..0000000
--- a/environment/setupEnv.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-if [ ! `which conda` ]
-then
-echo Please install the anaconda package manager
-exit 1
-fi
-
-if [ ! $1 ]
-then
-	echo "please specify an environment file"
-	exit
-fi
-
-addstring=""
-
-if [[ $2 == "gpu" ]]
-then
-	echo "setting up for gpu usage"
-	addstring="_${2}"
-fi
-		
- 
-
-envfile=$1
-envname="${envfile%.*}${addstring}"
-pipfile="${envfile%.*}.pip"
-
-conda create --copy --name $envname python=2.7.13 
-conda install --name $envname --file $envfile
-
-
-source activate $envname
-pip install -r $pipfile
-
-#conda install scikit-learn
-#conda install numpy #to update packages. fast bugfix. make a new conda list later
-
-cp activateROOT.sh  $CONDA_PREFIX/etc/conda/activate.d/activateROOT.sh 
-
-if [ $addstring ]
-then
-	pip install --ignore-installed  --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp27-none-linux_x86_64.whl
-        pip install setGPU
-fi
-
-echo "environment set up. Please activate it with \"source activate ${envname}\""
-
diff --git a/evaluation/__init__.py b/evaluation/__init__.py
index 8abba1e..992ec08 100644
--- a/evaluation/__init__.py
+++ b/evaluation/__init__.py
@@ -4,7 +4,7 @@
 from pdb import set_trace
 
 #gather all the files here
-modules = [basename(i.replace('.py','')) for i in glob('%s/[A-Za-z]*.py' % dirname(__file__))]
+modules = ["DeepJetCore.evaluation."+basename(i.replace('.py','')) for i in glob('%s/[A-Za-z]*.py' % dirname(__file__))]
 __all__ = []
 structure_list=[]
 for module_name in modules:
diff --git a/evaluation/evaluation.py b/evaluation/evaluation.py
index a526e95..7dbf543 100644
--- a/evaluation/evaluation.py
+++ b/evaluation/evaluation.py
@@ -19,9 +19,14 @@
 #
 #
 
-from __future__ import print_function
 
-colormap=['red'
+colormap=[
+    "#5e3c99",
+    "#e66101",
+    "#fdb863",
+    "#b2abd2",
+    
+    'red'
  , 'blue'
  , 'darkgreen'
  , 'purple'
@@ -31,7 +36,12 @@
  , 'darkpurple'
  , 'gray']
 
-dashedcolormap=['red','red,dashed'
+dashedcolormap=[
+    "#5e3c99","#5e3c99,dashed",
+    "#e66101","#e66101,dashed",
+    "#fdb863","#fdb863,dashed",
+    "#b2abd2","#b2abd2,dashed",
+    'red','red,dashed'
  , 'blue','blue,dashed'
  , 'darkgreen','darkgreen,dashed'
  , 'purple','purple,dashed'
@@ -42,138 +52,18 @@
  , 'gray','gray,dashed']
     
 from pdb import set_trace
-from DeepJetCore.compiled import c_storeTensor
-
-
-class testDescriptor(object):
-    
-    def __init__(self, addnumpyoutput=False):
-        self.__sourceroots=[]
-        self.__predictroots=[]
-        self.metrics=[]
-        self.use_only = []
-        self.addnumpyoutput=addnumpyoutput
-        
-    def makePrediction(self, model, testdatacollection, outputDir, 
-                       ident='', store_labels = False, monkey_class=''): 
-        import numpy as np        
-        from root_numpy import array2root
-        import os
-        monkey_class_obj = None
-        if monkey_class:
-            module, classname = tuple(monkey_class.split(':'))
-            _temp = __import__(module, globals(), locals(), [classname], -1) 
-            monkey_class_obj = getattr(_temp, classname)
-        
-        outputDir=os.path.abspath(outputDir)
-        
-        if len(ident)>0:
-            ident='_'+ident
-        
-        self.__sourceroots=[]
-        self.__predictroots=[]
-        self.metrics=[]
-        
-        fullnumpyarray=np.array([])
-        
-        for i in range(len(testdatacollection.samples)):
-            sample=testdatacollection.samples[i]
-            originroot=testdatacollection.originRoots[i]
-            outrootfilename=os.path.splitext(os.path.basename(originroot))[0]+'_predict'+ident+'.root'
-            
-            fullpath=testdatacollection.getSamplePath(sample)
-            if monkey_class_obj is not None:
-                testdatacollection.dataclass = monkey_class_obj()
-            td=testdatacollection.dataclass
-            
-            td.readIn(fullpath)
-            
-            if hasattr(td, 'customlabels'):
-                import copy
-                formatstring=copy.deepcopy(td.customlabels)
-            
-            else:
-                truthclasses=td.getUsedTruth()
-                formatstring=[]
-                if len(truthclasses)>0 and len(truthclasses[0])>0:
-                    formatstring = ['prob_%s%s' % (i, ident) for i in truthclasses]
-                regressionclasses=[]
-                if hasattr(td, 'regressiontargetclasses'):
-                    regressionclasses=td.regressiontargetclasses
-                #new implementation. Please check with the store_labels option, Mauro
-                formatstring.extend(['reg_%s%s' % (i, ident) for i in regressionclasses])
-
 
-            features=td.x
-            labels=td.y
-            weights=td.w[0]
-            
-            
-            
-            prediction = model.predict(features)
-            if self.use_only:
-                prediction = [prediction[i] for i in self.use_only]
-            if isinstance(prediction, list):
-                all_write = np.concatenate(prediction, axis=1)
-            else:
-                all_write = prediction
 
-            if all_write.ndim == 2:
-                all_write = np.concatenate([all_write, weights], axis=1)
-                formatstring.append('weight')
-                if not all_write.shape[1] == len(formatstring):
-                    print(formatstring, ' vs ', all_write.shape[1])
-                    raise ValueError('Prediction output does not match with the provided targets!')
-                
-                all_write = np.core.records.fromarrays(np.transpose(all_write), names= ','.join(formatstring))
-                array2root(all_write,outputDir+'/'+outrootfilename,"tree",mode="recreate")
-                
-                #self.metrics.append(metric)
-                self.__sourceroots.append(originroot)
-                self.__predictroots.append(outputDir+'/'+outrootfilename)
-                print(formatstring)
-                print('\ncreated prediction friend tree '+outputDir+'/'+outrootfilename+ ' for '+originroot)
-                if self.addnumpyoutput:
-                    if len(fullnumpyarray):
-                        fullnumpyarray=np.concatenate((fullnumpyarray,all_write))
-                    else:
-                        fullnumpyarray=np.array(all_write)
-            else:
-                c_storeTensor.store(np.ascontiguousarray(all_write, dtype=np.float32).ctypes.data, list(np.shape(all_write)), outputDir+'/'+outrootfilename)
-                self.__sourceroots.append(originroot)
-                self.__predictroots.append(outputDir+'/'+outrootfilename)
-                if self.addnumpyoutput:
-                    if len(fullnumpyarray):
-                        fullnumpyarray=np.concatenate((fullnumpyarray,all_write))
-                    else:
-                        fullnumpyarray=np.array(all_write)
-                    
-        if self.addnumpyoutput:    
-            np.save(outputDir+'/'+'allprediction.npy', fullnumpyarray)
-                
-            
-    def writeToTextFile(self, outfile):
-        '''
-        Very simple text file output to use when creating chains with friends.
-        Format:
-          source0.root prediction0.root
-          source1.root prediction1.root
-          ...
-        '''
-        listifle=open(outfile,'w')
-        for i in range(len(self.__predictroots)):
-            listifle.write(self.__sourceroots[i]+' '+self.__predictroots[i]+'\n')
-        listifle.close()
     
 def makeASequence(arg,length):
-    isseq=(not hasattr(arg, "strip") and
-            hasattr(arg, "__getitem__") or
-            hasattr(arg, "__iter__"))
+    isseq=((not hasattr(arg, "strip")) and
+            (hasattr(arg, "__getitem__") or
+            hasattr(arg, "__iter__")))
     out=[]
     if isseq:
         if len(arg)==length:
             return arg
-        for i in range(length/len(arg)):
+        for i in range(int(length/len(arg))):
             out.extend(arg)
     else:
         for i in range(length):
@@ -213,12 +103,18 @@ def createColours(colors_list,name_list,nnames=None,extralegend=[]):
 def makeROCs_async(intextfile, name_list, probabilities_list, truths_list, vetos_list,
                     colors_list, outpdffile, cuts='',cmsstyle=False, firstcomment='',secondcomment='',
                     invalidlist='',
-                    extralegend=None,
+                    extralegend=None, #['solid?udsg','hatched?c'])
                     logY=True,
                     individual=False,
                     xaxis="",
+                    yaxis="",
                     nbins=200,
-                    treename='deepntuplizer/tree'):#['solid?udsg','hatched?c']): 
+                    treename='deepntuplizer/tree',
+                    xmin=-1,
+                    experimentlabel="",lumilabel="",prelimlabel="",
+                    npoints=500,
+                    yscales=1.,
+                    no_friend_tree=False):
     
     import copy
     
@@ -242,7 +138,7 @@ def makeROCs_async(intextfile, name_list, probabilities_list, truths_list, vetos
         
     
     colors_list=createColours(colors_list,namelistcopy,nnames,extralegcopy)   
-        
+    
     #check if multi-input file   
     files=makeASequence(intextfile,len(namelistcopy))
     
@@ -253,6 +149,7 @@ def makeROCs_async(intextfile, name_list, probabilities_list, truths_list, vetos
     vetos_list=makeASequence(vetos_list,len(namelistcopy))
     invalidlist=makeASequence(invalidlist,len(namelistcopy))
     
+    yscaleslist = makeASequence(yscales,len(namelistcopy))
     
     
     from DeepJetCore.compiled import c_makeROCs
@@ -268,7 +165,8 @@ def worker():
                         outpdffile,allcuts,cmsstyle, 
                         firstcomment,secondcomment,
                         invalidlist,extralegcopy,logY,
-                        individual,xaxis,nbins,treename)
+                        individual,xaxis,yaxis,nbins,treename,xmin,
+                        experimentlabel,lumilabel,prelimlabel,yscaleslist,no_friend_tree)
         
         except Exception as e:
             print('error for these inputs:')
@@ -278,6 +176,7 @@ def worker():
             print(truths_list)
             print(vetos_list)
             print(invalidlist)
+            print(yscaleslist)
             raise e
     
     
@@ -326,7 +225,7 @@ def worker():
 def makeEffPlots_async(intextfile, name_list, variables, cutsnum,cutsden, colours,
                      outpdffile, xaxis='',yaxis='',
                      minimum=1e100,maximum=-1e100,
-                     rebinfactor=1, SetLogY = False, Xmin = 100, Xmax = -100. ,
+                     nbins=-1, SetLogY = False, Xmin = 100, Xmax = -100. ,
                      treename="deepntuplizer/tree"): 
     
     
@@ -339,12 +238,12 @@ def makeEffPlots_async(intextfile, name_list, variables, cutsnum,cutsden, colour
     
     
 
-    import c_makePlots
+    from DeepJetCore.compiled import c_makePlots
     def worker():
         try:
             c_makePlots.makeEffPlots(files_list,name_list,
                                  variables_list,cutsnum_list,cutsden_list,colours_list,
-                                 outpdffile,xaxis,yaxis,rebinfactor,SetLogY, Xmin, Xmax,minimum,maximum,treename)
+                                 outpdffile,xaxis,yaxis,nbins,SetLogY, Xmin, Xmax,minimum,maximum,treename)
         except Exception as e:
             print('error for these inputs:')
             print(files_list)
@@ -362,6 +261,7 @@ def worker():
 
 
 def make_association(txtfiles, input_branches=None, output_branches=None, limit=None):
+    raise ImportError("DeepJetCore.evaluation.make_association deprecated.")
     from root_numpy import root2array
     from pandas import DataFrame
     
@@ -392,12 +292,13 @@ def association(fname):
     return truth, models
     
     
-
+    
+    
 def plotLoss(infilename,outfilename,range):
     
     import matplotlib
-    
-    matplotlib.use('Agg') 
+    #matplotlib.use('Agg') 
+    import matplotlib.pyplot as plt
     
     infile=open(infilename,'r')
     trainloss=[]
@@ -406,6 +307,12 @@ def plotLoss(infilename,outfilename,range):
     i=0
     automax=0
     automin=100
+    nlines=0
+    with open(infilename,'r') as tmpfile:
+        for line in tmpfile:
+            if len(line)<1: continue
+            nlines+=1
+        
     for line in infile:
         if len(line)<1: continue
         tl=float(line.split(' ')[0])
@@ -414,13 +321,12 @@ def plotLoss(infilename,outfilename,range):
         valloss.append(vl)
         epochs.append(i)
         i=i+1
-        if i==5:
-            automax=max(tl,vl)
+        if i - float(nlines)/2. > 1.:
+            automax=max(automax,tl,vl)
         automin=min(automin,vl,tl)
         
     
-    import matplotlib.pyplot as plt
-    f = plt.figure()
+    
     plt.plot(epochs,trainloss,'r',label='train')
     plt.plot(epochs,valloss,'b',label='val')
     plt.ylabel('loss')
@@ -429,8 +335,49 @@ def plotLoss(infilename,outfilename,range):
     if len(range)==2:
         plt.ylim(range)
     elif automax>0:
-        plt.ylim([automin*0.9,automax])
-    f.savefig(outfilename)
+        plt.ylim([automin*0.9,automax*1.1])
+    #plt.show()
+    plt.savefig(outfilename, format='pdf') #why does this crash?
+    plt.close()
+
+
+def plotBatchLoss(infilename,outfilename,range):
+    
+    import matplotlib
+    #matplotlib.use('Agg') 
+    import matplotlib.pyplot as plt
+    
+    infile=open(infilename,'r')
+    trainloss=[]
+    batch=[]
+    i=0
+    automax=0
+    automin=100
+    nlines=0
+    with open(infilename,'r') as tmpfile:
+        for line in tmpfile:
+            if len(line)<1: continue
+            nlines+=1
+        
+    for line in infile:
+        if len(line)<1: continue
+        tl=float(line.split(' ')[0])
+        trainloss.append(tl)
+        batch.append(i)
+        i=i+1
+        if i - float(nlines)/2. > 1.:
+            automax=max(automax,tl)
+        automin=min(automin,tl)
+        
+    
+    
+    plt.plot(batch,trainloss,'r',label='train')
+    plt.ylabel('loss')
+    plt.xlabel('batch')
+    plt.legend()
+    plt.ylim([0,6.2])
+    #plt.show()
+    plt.savefig(outfilename) #why does this crash?
     plt.close()
     
 ######### old part - keep for reference, might be useful some day 
diff --git a/gpu_env.sh b/gpu_env.sh
index 37fdeb8..9195e33 100644
--- a/gpu_env.sh
+++ b/gpu_env.sh
@@ -1,6 +1,5 @@
 
-
-source activate deepjetLinux3_gpu
+		source activate deepjetLinux3_gpu
 
 export DEEPJETCORE=`pwd`
 
diff --git a/lxplus_env.sh b/lxplus_env.sh
index a427d23..3ae4dd3 100644
--- a/lxplus_env.sh
+++ b/lxplus_env.sh
@@ -1,6 +1,8 @@
 
 
-source activate deepjetLinux3
+
+   source deactivate
+   source activate deepjetLinux3
 
 export DEEPJETCORE=`pwd`
 
diff --git a/modeltools.py b/modeltools.py
index 40226e2..0965f35 100644
--- a/modeltools.py
+++ b/modeltools.py
@@ -1,5 +1,8 @@
+from DeepJetCore.customObjects import *
 
-
+custom_objs = get_custom_objects()
+    
+    
 def getLayer(model, name):
     for layer in model.layers:
         if layer.name == name:
@@ -7,7 +10,6 @@ def getLayer(model, name):
         
 
 
-
 def printLayerInfosAndWeights(model, noweights=False):
     for layer in model.layers:
         g=layer.get_config()
@@ -18,9 +20,11 @@ def printLayerInfosAndWeights(model, noweights=False):
 
 
 def fixLayersContaining(m, fixOnlyContaining, invert=False):
-    isseq=(not hasattr(fixOnlyContaining, "strip") and
-            hasattr(fixOnlyContaining, "__getitem__") or
-            hasattr(fixOnlyContaining, "__iter__"))
+    import collections.abc
+    if isinstance(fixOnlyContaining, collections.abc.Sequence) and not isinstance(fixOnlyContaining, str):
+        isseq=True
+    else:
+        isseq=False
     if not isseq:
         fixOnlyContaining=[fixOnlyContaining]
     if invert:
@@ -32,27 +36,91 @@ def fixLayersContaining(m, fixOnlyContaining, invert=False):
                     m.get_layer(index=layidx).trainable=True
     else:
         for layidx in range(len(m.layers)):
-            for ident in fixOnlyContaining:
+            for ident in fixOnlyContaining:    
                 if len(ident) and ident in m.get_layer(index=layidx).name:
                     m.get_layer(index=layidx).trainable=False
     return m
 
 def set_trainable(m, patterns, value):
-	if isinstance(patterns, basestring):
-		patterns = [patterns]
-	for layidx in range(len(m.layers)):
-		name = m.get_layer(index=layidx).name
-		if any(i in name for i in patterns):
-			m.get_layer(index=layidx).trainable = value
-	return m
+    if isinstance(patterns, basestring):
+        patterns = [patterns]
+    for layidx in range(len(m.layers)):
+        name = m.get_layer(index=layidx).name
+        if any(i in name for i in patterns):
+            m.get_layer(index=layidx).trainable = value
+    return m
 
+def setAllTrainable(m, val=True):
+    for layidx in range(len(m.layers)):
+        m.get_layer(index=layidx).trainable = val
+    return m
 
 def loadModelAndFixLayers(filename,fixOnlyContaining):
     #import keras
     from keras.models import load_model
     
-    m=load_model(filename)
+    m=load_model(filename, custom_objects=custom_objs)
     
     fixLayersContaining(m, fixOnlyContaining)
                 
     return m
+
+def load_model(filename):
+    from keras.models import load_model
+    
+    model=load_model(filename, custom_objects=custom_objs)
+    
+    return model
+
+def apply_weights_where_possible(target_model, weight_model):
+    
+    for layer_a in target_model.layers:
+        for layer_b in weight_model.layers:
+            if layer_a.name == layer_b.name:
+                try:
+                    layer_a.set_weights(layer_b.get_weights()) 
+                    print('using weights from ',  layer_a.name)
+                except:  
+                    print('unable to copy weights for layer ',  layer_a.name)
+                    #print(layer_a.weights,'\n',layer_b.weights)
+    
+    
+    return target_model
+
+
+
+
+
+################# wrappers for keras models in DJC
+
+import tensorflow as tf
+
+class DJCKerasModel(tf.keras.models.Model):
+    '''
+    Base class to implement automatic shape feeding as in DJC
+    Interfaces smoothly with training_base
+    '''
+    def __init__(self,*args,**kwargs):
+        
+        super(DJCKerasModel, self).__init__(*args,dynamic=False,**kwargs)
+        self.keras_input_shapes=None
+        self._is_djc_keras_model = True
+    
+    def setInputShape(self,keras_inputs):
+        self.keras_input_shapes=[i.shape for i in keras_inputs]
+        
+    def build(self,input_shapes):
+        super(DJCKerasModel,self).build(self.keras_input_shapes)
+    
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/predict_pipeline.png b/predict_pipeline.png
new file mode 100644
index 0000000..d8bb14c
Binary files /dev/null and b/predict_pipeline.png differ
diff --git a/preprocessing/__init__.py b/preprocessing/__init__.py
index 8abba1e..e94f962 100644
--- a/preprocessing/__init__.py
+++ b/preprocessing/__init__.py
@@ -4,7 +4,7 @@
 from pdb import set_trace
 
 #gather all the files here
-modules = [basename(i.replace('.py','')) for i in glob('%s/[A-Za-z]*.py' % dirname(__file__))]
+modules = ["DeepJetCore.preprocessing."+basename(i.replace('.py','')) for i in glob('%s/[A-Za-z]*.py' % dirname(__file__))]
 __all__ = []
 structure_list=[]
 for module_name in modules:
diff --git a/preprocessing/preprocessing.py b/preprocessing/preprocessing.py
index 56fc0de..230051e 100644
--- a/preprocessing/preprocessing.py
+++ b/preprocessing/preprocessing.py
@@ -1,9 +1,7 @@
 import numpy
 #import scipy
 #from scipy.sparse import csc_matrix
-"""
-author Markus stoye, A collection of tools for data pre-processing in ML for DeepJet. The basic assumption is that Tuple is a recarray where the fiels are the features. 
-"""
+
 from array import array
 import logging
 
@@ -541,5 +539,70 @@ def MeanNormZeroPad(Filename_in,MeanNormTuple,inbranches_listlist,nMaxslist,neve
     #print(array)
     return array
 
+def read2DArray(filename, treename, branchname, nevents: int, xsize: int, ysize: int,
+                rebinx: int=1,rebiny: int=1,
+                zeropad=False):
+    
+    if xsize%rebinx or ysize%rebiny:
+        raise Exception("rebinning factors don't not match the bin counts")
+    
+    from DeepJetCore.compiled import c_arrayReads
+    
+    array = numpy.zeros((nevents,xsize//rebinx, ysize//rebiny,1) , dtype='float32')
+    ncut=numpy.array([0],dtype='float32')
+    c_arrayReads.read2DArray(array,filename, treename, branchname,rebinx,rebiny,zeropad, False, ncut)
+    
+    return array
+    
+def readListArray(filename, treename, branchname, nevents: int, list_size: int, n_feat_per_element: int,
+                zeropad=False, list_size_cut=False):
+    
+    
+    from DeepJetCore.compiled import c_arrayReads
+    
+    n_cut = numpy.array([0],dtype='int')
+    
+    array = numpy.zeros((nevents,list_size, n_feat_per_element,1) , dtype='float32')
+    
+    c_arrayReads.read2DArray(array,filename, treename, branchname,1,1,zeropad,list_size_cut,n_cut)
+    
+    array = numpy.squeeze(array, axis=-1)
+    
+    if list_size_cut:
+        return array, n_cut[0]
+    
+    return array
+    
+def read3DArray(filename, treename, branchname, nevents: int, xsize: int, ysize: int, zsize: int,
+                rebinx: int=1,rebiny: int=1,rebinz: int=1,
+                zeropad=False):
+    
+    if xsize%rebinx or ysize%rebiny or zsize%rebinz:
+        raise Exception("rebinning factors don't not match the bin counts")
+    
+    from DeepJetCore.compiled import c_arrayReads
+    
+    array = numpy.zeros((nevents,xsize//rebinx, ysize//rebiny,zsize//rebinz,1) , dtype='float32')
+    
+    c_arrayReads.read3DArray(array,filename, treename, branchname,rebinx,rebiny,rebinz,zeropad)
+    
+    return array
 
-
+def read4DArray(filename, treename, branchname, nevents: int, xsize: int, ysize: int, zsize: int, fsize: int,
+                rebinx: int=1,rebiny: int=1,rebinz: int=1,rebinf: int=1,
+                zeropad=False):
+    
+    if xsize%rebinx or ysize%rebiny or zsize%rebinz or fsize%rebinf:
+        raise Exception("rebinning factors don't not match the bin counts")
+    
+    from DeepJetCore.compiled import c_arrayReads
+    
+    
+    array = numpy.zeros((nevents,xsize//rebinx, ysize//rebiny,zsize//rebinz,fsize//rebinf,1) , dtype='float32')
+    
+    print(array.shape)
+    
+    c_arrayReads.read4DArray(array,filename, treename, branchname,rebinx,rebiny,rebinz,rebinf,zeropad)
+    
+    return array
+    
diff --git a/testing/files/filelist.txt b/testing/files/filelist.txt
new file mode 100644
index 0000000..9945fab
--- /dev/null
+++ b/testing/files/filelist.txt
@@ -0,0 +1,4 @@
+root_file0.root
+root_file1.root
+root_file2.root
+root_file3.root
diff --git a/testing/files/root_file0.root b/testing/files/root_file0.root
new file mode 100644
index 0000000..b9e50ce
Binary files /dev/null and b/testing/files/root_file0.root differ
diff --git a/testing/files/root_file1.root b/testing/files/root_file1.root
new file mode 100644
index 0000000..967f8ed
Binary files /dev/null and b/testing/files/root_file1.root differ
diff --git a/testing/files/root_file2.root b/testing/files/root_file2.root
new file mode 100644
index 0000000..17abb58
Binary files /dev/null and b/testing/files/root_file2.root differ
diff --git a/testing/files/root_file3.root b/testing/files/root_file3.root
new file mode 100644
index 0000000..aea7d5d
Binary files /dev/null and b/testing/files/root_file3.root differ
diff --git a/testing/files/root_file4.root b/testing/files/root_file4.root
new file mode 100644
index 0000000..7cb08e3
Binary files /dev/null and b/testing/files/root_file4.root differ
diff --git a/testing/files/root_file5.root b/testing/files/root_file5.root
new file mode 100644
index 0000000..a1b03a4
Binary files /dev/null and b/testing/files/root_file5.root differ
diff --git a/testing/files/root_file6.root b/testing/files/root_file6.root
new file mode 100644
index 0000000..701c6d4
Binary files /dev/null and b/testing/files/root_file6.root differ
diff --git a/testing/files/root_file7.root b/testing/files/root_file7.root
new file mode 100644
index 0000000..ab9cf34
Binary files /dev/null and b/testing/files/root_file7.root differ
diff --git a/testing/files/root_file8.root b/testing/files/root_file8.root
new file mode 100644
index 0000000..79706f1
Binary files /dev/null and b/testing/files/root_file8.root differ
diff --git a/testing/files/root_file9.root b/testing/files/root_file9.root
new file mode 100644
index 0000000..0016b5f
Binary files /dev/null and b/testing/files/root_file9.root differ
diff --git a/testing/unit/TestCFunctions.py b/testing/unit/TestCFunctions.py
new file mode 100644
index 0000000..ef24cc8
--- /dev/null
+++ b/testing/unit/TestCFunctions.py
@@ -0,0 +1,9 @@
+from DeepJetCore.compiled.c_testFunctions import *
+
+import unittest
+
+
+class TestCFunctions(unittest.TestCase):
+    def test_trainDataFiller(self):
+        testTrainDataFileStreamer()
+    
\ No newline at end of file
diff --git a/testing/unit/TestCompatibility.py b/testing/unit/TestCompatibility.py
new file mode 100644
index 0000000..26f0fb0
--- /dev/null
+++ b/testing/unit/TestCompatibility.py
@@ -0,0 +1,47 @@
+'''
+Checks for file compatibility with (only) the previous version.
+'''
+
+from DeepJetCore.TrainData import TrainData
+from DeepJetCore.SimpleArray import SimpleArray
+import numpy as np
+import unittest
+
+
+class TestCompatibility(unittest.TestCase):
+    
+    def test_SimpleArrayRead(self):
+        print('TestCompatibility SimpleArray')
+        a = SimpleArray()
+        a.readFromFile("simpleArray_previous.djcsa")
+        
+        arr = np.load("np_arr.npy")
+        #FIXME: this array was actually wrong
+        arr = arr[:100]
+        rs = np.load("np_rs.npy")
+        
+        b = SimpleArray(arr,rs)
+        
+        self.assertEqual(a,b)
+        
+    def test_TrainDataRead(self):
+        print('TestCompatibility TrainData')
+        td = TrainData()
+        td.readFromFile('trainData_previous.djctd')
+        
+        self.assertEqual(td.nFeatureArrays(), 1)
+        
+        arr = np.load("np_arr.npy")
+        #FIXME: this array was actually wrong
+        arr = arr[:100]
+        rs = np.load("np_rs.npy")
+        
+        b = SimpleArray(arr,rs)
+        
+        a = td.transferFeatureListToNumpy(False)
+        a, rs = a[0],a[1]
+        
+        a = SimpleArray(a,np.array(rs,dtype='int64'))
+        
+        self.assertEqual(a,b)
+        
\ No newline at end of file
diff --git a/testing/unit/TestSimpleArray.py b/testing/unit/TestSimpleArray.py
new file mode 100644
index 0000000..d229227
--- /dev/null
+++ b/testing/unit/TestSimpleArray.py
@@ -0,0 +1,148 @@
+from DeepJetCore.SimpleArray import SimpleArray
+import numpy as np
+import unittest
+import os
+
+class TestSimpleArray(unittest.TestCase):
+
+
+    def createNumpy(self,dtype):
+        arr = np.array(np.random.rand(500,3,5,6)*100., dtype=dtype)
+        rs = np.array([0,100,230,500],dtype='int64')
+        return arr, rs
+
+    def test_createFromNumpy(self):
+        print('TestSimpleArray: createFromNumpy')
+        arr,rs = self.createNumpy('float32')
+        
+        a = SimpleArray(dtype='float32')
+        a.createFromNumpy(arr,rs)
+        
+        narr, nrs = a.copyToNumpy()
+
+        diff = np.max(np.abs(narr-arr))
+        diff += np.max(np.abs(nrs-rs))
+        self.assertTrue(diff< 0.000001)
+        
+    def test_transferToNumpy(self):
+        print('TestSimpleArray: transferToNumpy')
+        arr,rs = self.createNumpy('float32')
+        a = SimpleArray(arr,rs)
+        narr, nrs = a.transferToNumpy()
+        diff = np.max(np.abs(narr-arr))
+        diff += np.max(np.abs(nrs-rs))
+        self.assertTrue(diff< 0.000001)
+        
+        
+    def test_transferToNumpyInt(self):
+        print('TestSimpleArray: transferToNumpyInt')
+        arr,rs = self.createNumpy('int32')
+        a = SimpleArray(arr,rs)
+        narr, nrs = a.transferToNumpy()
+        diff = np.max(np.abs(narr-arr))
+        diff += np.max(np.abs(nrs-rs))
+        self.assertTrue(diff< 0.000001)
+        
+    def test_createFromNumpyInt(self):
+        print('TestSimpleArray: createFromNumpyInt')
+        
+        arr,rs = self.createNumpy('int32')
+        
+        a = SimpleArray(dtype='int32')
+        a.createFromNumpy(arr,rs)
+        
+        narr, nrs = a.copyToNumpy()
+        diff = np.max(np.abs(narr-arr))
+        diff += np.max(np.abs(nrs-rs))
+        self.assertTrue(diff< 0.000001)
+        
+    def test_dynamicTypeChange(self):
+        print('TestSimpleArray: dynamicTypeChange')
+        arr,rs = self.createNumpy('int32')
+        name = "lala"
+        a = SimpleArray(dtype='float32',name=name)
+        fnames = ["a","b","c","d","e","f"]
+        a.setFeatureNames(fnames)
+        a.createFromNumpy(arr,rs)
+        self.assertTrue(a.featureNames() == fnames)
+        self.assertTrue(a.name() == name)
+        
+        
+    def test_writeRead(self):
+        print('TestSimpleArray: writeRead')
+        arr,rs = self.createNumpy('float32')
+        
+        a = SimpleArray(arr,rs)
+        a.setName("myname")
+        a.setFeatureNames(["a","b","c","d","e","f"])
+        a.writeToFile("testfile.djcsa")
+        b = SimpleArray()
+        b.readFromFile("testfile.djcsa")
+        os.system('rm -f testfile.djcsa')
+        #os.system("rf -f testfile")
+        
+        ad, ars = a.copyToNumpy()
+        bd, brs = b.copyToNumpy()
+        diff = np.max(np.abs(ad-bd))
+        diff += np.max(np.abs(ars-brs))
+        self.assertTrue(diff==0)
+        
+            
+    def test_equal(self):
+        print('TestSimpleArray: equal')
+        arr,rs = self.createNumpy('float32')
+        
+        a = SimpleArray()
+        a.createFromNumpy(arr,rs)
+        
+        b = SimpleArray()
+        b.createFromNumpy(arr,rs)
+        
+        self.assertEqual(a, b)
+        
+        b = a.copy()
+        self.assertEqual(a, b)
+        
+        b.setFeatureNames(["a","b","c","d","e","f"])
+        self.assertNotEqual(a, b)
+        
+        
+        
+        
+    def test_append(self):
+        print('TestSimpleArray: append')
+        arr,rs = self.createNumpy('float32')
+        
+        arr2,_ = self.createNumpy('float32')
+        
+        a = SimpleArray(arr,rs)
+        aa = SimpleArray(arr2,rs)
+        a.append(aa)
+        
+        arr2 = np.concatenate([arr,arr2],axis=0)
+        rs2 = rs.copy()[1:]
+        rs2 += rs[-1]
+        rs2 = np.concatenate([rs,rs2],axis=0)
+        
+        b = SimpleArray(arr2,rs2)
+        self.assertEqual(a, b)
+        
+    def test_split(self):
+        print('TestSimpleArray: split')
+        
+        arr,rs = self.createNumpy('float32')
+        a = SimpleArray(arr,rs,name="myarray")
+        
+        arrs, rss = arr[:rs[2]], rs[:3]
+        b = SimpleArray(arrs,rss,name="myarray")
+        
+        asplit = a.split(2)
+        self.assertEqual(asplit, b)
+        
+    def test_name(self):
+        print('TestSimpleArray: name')
+        arr,rs=self.createNumpy('float32')
+        a = SimpleArray(arr,rs)
+        a.setName("myname")
+        self.assertEqual("myname", a.name())
+        
diff --git a/testing/unit/TestTrainData.py b/testing/unit/TestTrainData.py
new file mode 100644
index 0000000..0e7cc0d
--- /dev/null
+++ b/testing/unit/TestTrainData.py
@@ -0,0 +1,160 @@
+from DeepJetCore.TrainData import TrainData
+from DeepJetCore.SimpleArray import SimpleArray
+import numpy as np
+import unittest
+import os
+
+class TestTrainData(unittest.TestCase):
+    
+    def createSimpleArray(self, dtype, length=500, shape=None):
+        arr = np.array(np.random.rand(length,3,5,6)*100., dtype=dtype)
+        rs = np.array([0,100,230,length],dtype='int64')
+        return SimpleArray(arr, rs)
+    
+    def sub_test_store(self, readWrite):
+        td = TrainData()
+        x,y,w = self.createSimpleArray('int32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
+        x_orig=x.copy()
+        x2,y2,_ = self.createSimpleArray('float32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
+        x2_orig=x2.copy()
+        y_orig=y.copy()
+        
+        td._store([x,x2], [y,y2], [w])
+        
+        if readWrite:
+            td.writeToFile("testfile.tdjctd")
+            td = TrainData()
+            td.readFromFile("testfile.tdjctd")
+            os.system('rm -f testfile.tdjctd')
+        
+        shapes = td.getNumpyFeatureShapes()
+        self.assertEqual([[3, 5, 6], [1], [3, 5, 6], [1]], shapes,"shapes")
+        
+        self.assertEqual(2, td.nFeatureArrays())
+        self.assertEqual(2, td.nTruthArrays())
+        self.assertEqual(1, td.nWeightArrays())
+        
+        f = td.transferFeatureListToNumpy(False)
+        t = td.transferTruthListToNumpy(False)
+        w = td.transferWeightListToNumpy(False)
+        
+        xnew = SimpleArray(f[0],np.array(f[1],dtype='int64'))
+        self.assertEqual(x_orig, xnew)
+        
+        xnew = SimpleArray(f[2],np.array(f[3],dtype='int64'))
+        self.assertEqual(x2_orig, xnew)
+        
+        ynew = SimpleArray(t[0],np.array(t[1],dtype='int64'))
+        self.assertEqual(y_orig, ynew)
+        
+    def test_store(self):  
+        print('TestTrainData: store')
+        self.sub_test_store(False)  
+        
+    def test_readWrite(self):
+        print('TestTrainData: readWrite')
+        self.sub_test_store(True) 
+        
+    def nestedEqual(self,l,l2):
+        for a,b in zip(l,l2):
+            if not np.all(a==b):
+                return False
+        return True
+    
+    def test_AddToFile(self):
+        print('TestTrainData: AddToFile')
+        
+        td = TrainData()
+        x,y,w = self.createSimpleArray('int32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
+        xo,yo,wo = x.copy(),y.copy(),w.copy()
+        x2,y2,_ = self.createSimpleArray('float32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
+        x2o,y2o = x2.copy(),y2.copy()
+        td._store([x,x2], [y,y2], [w])
+        
+        td.writeToFile("testfile.tdjctd")
+        td.addToFile("testfile.tdjctd")
+        
+        
+        td2 = TrainData()
+        td2._store([xo,x2o], [yo,y2o], [wo])
+        td2.append(td)
+        
+        td.readFromFile("testfile.tdjctd")
+        os.system('rm -f testfile.tdjctd')
+        
+        
+        self.assertEqual(td,td2)
+        
+    def test_slice(self):
+        print('TestTrainData: skim')
+        a = self.createSimpleArray('int32',600)
+        b = self.createSimpleArray('float32',600)
+        d = self.createSimpleArray('float32',600)
+
+        a_slice = a.getSlice(2,3)
+        b_slice = b.getSlice(2,3)
+        d_slice = d.getSlice(2,3)
+
+        td = TrainData()
+        td._store([a,b], [d], [])
+        td_slice = td.getSlice(2,3)
+        
+        fl = td_slice.transferFeatureListToNumpy(False)
+        tl = td_slice.transferTruthListToNumpy(False)
+        a_tdslice = SimpleArray(fl[0],fl[1])
+        b_tdslice = SimpleArray(fl[2],fl[3])
+        d_tdslice = SimpleArray(tl[0],tl[1])
+
+        self.assertEqual(a_slice, a_tdslice)
+        self.assertEqual(b_slice, b_tdslice)
+        self.assertEqual(d_slice, d_tdslice)
+        
+        #test skim
+        td.skim(2)
+        fl = td.transferFeatureListToNumpy(False)
+        tl = td.transferTruthListToNumpy(False)
+        a_tdslice = SimpleArray(fl[0],fl[1])
+        b_tdslice = SimpleArray(fl[2],fl[3])
+        d_tdslice = SimpleArray(tl[0],tl[1])
+        
+        self.assertEqual(a_slice, a_tdslice)
+        self.assertEqual(b_slice, b_tdslice)
+        self.assertEqual(d_slice, d_tdslice)
+        
+            
+        
+    def test_split(self):
+        print('TestTrainData: split')
+        a = self.createSimpleArray('int32')
+        b = self.createSimpleArray('float32',600)
+        c = self.createSimpleArray('int32')
+        d = self.createSimpleArray('float32',400)
+        all_orig = [a.copy(),b.copy(),c.copy(),d.copy()]
+        all_splitorig = [sa.split(2) for sa in all_orig]
+        
+        td = TrainData()
+        td._store([a,b], [c,d], [])
+        
+        
+        tdb = td.split(2)
+        f = tdb.transferFeatureListToNumpy(False)
+        t = tdb.transferTruthListToNumpy(False)
+        _ = tdb.transferWeightListToNumpy(False)
+        all_split = [SimpleArray(f[0],f[1]), SimpleArray(f[2],f[3]),
+                     SimpleArray(t[0],t[1]), SimpleArray(t[2],t[3])]
+        
+        self.assertEqual(all_splitorig,all_split)
+        
+    def test_KerasDTypes(self):
+        print('TestTrainData: split')
+        a = self.createSimpleArray('int32')
+        b = self.createSimpleArray('float32',600)
+        c = self.createSimpleArray('int32')
+        d = self.createSimpleArray('float32',400)
+        
+        td = TrainData()
+        td._store([a,b], [c,d], [])
+        
+        #data, rs, data, rs
+        self.assertEqual(td.getNumpyFeatureDTypes(), ['int32','int64','float32','int64'])
+        
\ No newline at end of file
diff --git a/testing/unit/TestTrainDataGenerator.py b/testing/unit/TestTrainDataGenerator.py
new file mode 100644
index 0000000..06777c6
--- /dev/null
+++ b/testing/unit/TestTrainDataGenerator.py
@@ -0,0 +1,187 @@
+import numpy as np
+from DeepJetCore import TrainData, DataCollection
+import shutil
+import unittest
+
+class RaggedTester(object):
+    def __init__(self, max_per_rs=543):
+        self.max_per_rs=max_per_rs
+
+    def createEvent(self,length: int,dtype='float32'):
+        a = np.arange(length,dtype=dtype)
+        a = np.expand_dims(a,axis=1)
+        return a
+    
+    def checkEvent(self,a,dtype='float32'):
+        checkarr = self.createEvent(len(a),dtype)
+        return np.all(checkarr==a) and checkarr.dtype == a.dtype
+        
+    
+    
+    def createData(self,ntotal):
+        segments = np.random.randint(2,self.max_per_rs,size=ntotal) #8347
+        row_splits = [0]
+        data=[]
+        for s in segments:
+            data.append(self.createEvent(s))
+            row_splits.append(s+row_splits[-1])
+        
+        return np.concatenate(data), np.array(row_splits,dtype='int64')
+
+    def checkData(self,data,rs,dtype='float32'):
+        for i in range(len(rs)-1):
+            ea=data[rs[i]:rs[i+1]]
+            if not self.checkEvent(ea,dtype):
+                return False
+        return True
+ 
+import tempfile 
+class TempFileList(object):
+    def __init__(self, length=1):
+        self._files=[tempfile.NamedTemporaryFile(delete=True) for _ in range(length)]
+        self.filenames = [f.name for f in self._files]
+        
+    def __del__(self):
+        for f in self._files:
+            f.close()
+    
+        
+            
+class TempDir(object):
+    def __init__(self, delete=True):
+        self.path = tempfile.mkdtemp()
+        self.delete=delete
+        print(self.path)
+        
+    def __del__(self):
+        if not self.delete:
+            return
+        shutil.rmtree(self.path)
+        
+class TempDirName(object):
+    def __init__(self):
+        td=TempDir(delete=True)
+        self.path=td.path
+        del td
+    
+          
+
+## self-consistency check
+
+raggedtester=RaggedTester()
+
+class TrainData_test(TrainData):
+    
+    def convertFromSourceFile(self, filename, weighterobjects, istraining):
+        global raggedtester
+        import hashlib      
+        from DeepJetCore import SimpleArray
+        
+        seed = int(hashlib.sha1(filename.encode('utf-8')).hexdigest(), 16) % (10 ** 8)
+        np.random.seed(seed)
+        nsamples = np.random.randint(12,101,size=1)
+        data,rs = raggedtester.createData(nsamples)
+        
+        farr = SimpleArray(data, rs,name="features_ragged")
+        true_arr = SimpleArray(data, rs,name="truth_ragged")
+        farrint = SimpleArray(np.array(data,dtype='int32'), rs, name="features_int_ragged")
+        #farr.createFromNumpy()
+        
+        return [farr,farrint],[true_arr],[]
+    
+
+
+class TestTrainDataGenerator(unittest.TestCase):
+        
+    def test_fullGenerator(self):
+        print("TestTrainDataGenerator full generator")
+        
+        passed = True
+        
+        n_files=11
+        n_per_batch=2078
+        files = TempFileList(n_files)
+        dcoutdir = TempDirName()
+    
+        n_per_batch=n_per_batch
+        
+        dc = DataCollection()
+        dc.dataclass = TrainData_test
+        dc.sourceList = [f for f in files.filenames]
+        dc.createDataFromRoot(TrainData_test, outputDir=dcoutdir.path)
+        
+        gen = dc.invokeGenerator()
+        gen.setBatchSize(n_per_batch)
+        
+        for epoch in range(10):
+            gen.prepareNextEpoch()
+            for b in range(gen.getNBatches()):
+                d,t = next(gen.feedNumpyData())
+                data,rs, dint, _ = d[0],d[1],d[2],d[3]
+                truth = t[0]
+                rs = rs[:,0]#remove last 1 dim
+                
+                datagood = raggedtester.checkData(data, rs)
+                datagood = datagood and raggedtester.checkData(dint, rs, 'int32')
+                datagood = datagood and raggedtester.checkData(truth, rs)
+                
+                if not datagood:
+                    print('epoch',epoch, 'batch',b,'broken')
+                    passed=False
+                    break
+                if rs[-1] > n_per_batch:
+                    print('maximum batch size exceeded for batch ',b, 'epoch', epoch)
+                    passed = False
+                    break
+                
+            gen.shuffleFileList()
+            
+        shutil.rmtree(dcoutdir.path)
+        self.assertTrue(passed)
+        
+    def test_fullGeneratorDict(self):
+        print("TestTrainDataGenerator full generator with dictionary")
+        
+        passed = True
+        
+        n_files=11
+        n_per_batch=2078
+        files = TempFileList(n_files)
+        dcoutdir = TempDirName()
+    
+        n_per_batch=n_per_batch
+        
+        dc = DataCollection()
+        dc.dataclass = TrainData_test
+        dc.sourceList = [f for f in files.filenames]
+        dc.createDataFromRoot(TrainData_test, outputDir=dcoutdir.path)
+        
+        gen = dc.invokeGenerator()
+        gen.setBatchSize(n_per_batch)
+        gen.dict_output = True
+        
+        for epoch in range(10):
+            gen.prepareNextEpoch()
+            for b in range(gen.getNBatches()):
+                d,t = next(gen.feedNumpyData())
+                data,rs, dint = d['features_ragged'],d['features_ragged_rowsplits'],d['features_int_ragged']
+                truth = t['truth_ragged']
+                rs = rs[:,0]#remove last 1 dim
+                
+                datagood = raggedtester.checkData(data, rs)
+                datagood = datagood and raggedtester.checkData(dint, rs, 'int32')
+                datagood = datagood and raggedtester.checkData(truth, rs)
+                
+                if not datagood:
+                    print('epoch',epoch, 'batch',b,'broken')
+                    passed=False
+                    break
+                if rs[-1] > n_per_batch:
+                    print('maximum batch size exceeded for batch ',b, 'epoch', epoch)
+                    passed = False
+                    break
+                
+            gen.shuffleFileList()
+            
+        shutil.rmtree(dcoutdir.path)
+        self.assertTrue(passed)
diff --git a/testing/unit/np_arr.npy b/testing/unit/np_arr.npy
new file mode 100644
index 0000000..b962fab
Binary files /dev/null and b/testing/unit/np_arr.npy differ
diff --git a/testing/unit/np_rs.npy b/testing/unit/np_rs.npy
new file mode 100644
index 0000000..02a4347
Binary files /dev/null and b/testing/unit/np_rs.npy differ
diff --git a/testing/unit/simpleArray_previous.djcsa b/testing/unit/simpleArray_previous.djcsa
new file mode 100644
index 0000000..12653bf
Binary files /dev/null and b/testing/unit/simpleArray_previous.djcsa differ
diff --git a/testing/unit/test.py b/testing/unit/test.py
new file mode 100644
index 0000000..ac3f29d
--- /dev/null
+++ b/testing/unit/test.py
@@ -0,0 +1,8 @@
+from TestSimpleArray import TestSimpleArray
+from TestTrainData import TestTrainData
+from TestCompatibility import TestCompatibility
+from TestTrainDataGenerator import TestTrainDataGenerator
+from TestCFunctions import TestCFunctions
+import unittest
+
+unittest.main()
\ No newline at end of file
diff --git a/testing/unit/trainData_previous.djctd b/testing/unit/trainData_previous.djctd
new file mode 100644
index 0000000..38d80e7
Binary files /dev/null and b/testing/unit/trainData_previous.djctd differ
diff --git a/training/DeepJet_callbacks.py b/training/DeepJet_callbacks.py
index eff3070..679da91 100644
--- a/training/DeepJet_callbacks.py
+++ b/training/DeepJet_callbacks.py
@@ -3,17 +3,349 @@
 
 @author: jkiesele
 '''
-from __future__ import print_function
+
+import matplotlib
+matplotlib.use('Agg') 
+
 
 from .ReduceLROnPlateau import ReduceLROnPlateau
 from ..evaluation import plotLoss
+from ..evaluation import plotBatchLoss
 
-from keras.callbacks import Callback, EarlyStopping,History,ModelCheckpoint #, ReduceLROnPlateau # , TensorBoard
+import matplotlib.pyplot as plt
+import numpy as np
+from multiprocessing import Process
+
+from tensorflow.keras.callbacks import Callback, EarlyStopping,History,ModelCheckpoint #, ReduceLROnPlateau # , TensorBoard
 # loss per epoch
 from time import time
 from pdb import set_trace
 import json
 from keras import backend as K
+import matplotlib
+import os
+matplotlib.use('Agg') 
+
+#helper
+
+def publish(file_to_publish, publish_to_path):
+    try:
+        cpstring = 'cp -f '
+        if "@" in publish_to_path:
+            cpstring = 'scp -o ConnectTimeout=20 '
+            spl = publish_to_path.split(':')
+            path = spl[1]
+            user = spl[0].split('@')[0]
+            server = spl[0].split('@')[1]
+            os.system('ssh -o ConnectTimeout=20 '+user+'@'+server+' "mkdir -p '+path+'"')
+        basefilename = os.path.basename(file_to_publish)
+        os.system(cpstring + file_to_publish + ' ' + publish_to_path +'/'+basefilename+ ' 2>&1 > /dev/null') 
+    except Exception as e:
+        print('exception in publish', e, 'when trying to publish to',publish_to_path ,'. Training will ignore this exception and move on.')
+
+def hampel(vals_orig, k=7, t0=3):
+    '''
+    Hampel, Frank R. “The Influence Curve and Its Role in Robust Estimation.” 
+    Journal of the American Statistical Association 69, no. 346 (1974): 383–93. 
+    https://doi.org/10.2307/2285666.
+    
+    vals: pandas series of values from which to remove outliers
+    k: size of window (including the sample; 7 is equal to 3 on either side of value)
+    
+    Implementation adapted from
+    https://newbedev.com/filtering-outliers-how-to-make-median-based-hampel-function-faster
+    '''
+    #Make copy so original not edited
+    vals=vals_orig.copy()
+    #Hampel Filter
+    L= 1.4826
+    rolling_median=vals.rolling(k).median()
+    signed_difference = vals-rolling_median
+    difference=np.abs(signed_difference)
+    median_abs_deviation=difference.rolling(k).median()
+    threshold= t0 *L * median_abs_deviation
+    outlier_idx=difference>threshold
+    vals[outlier_idx]=vals-signed_difference
+    return vals
+
+class simpleMetricsCallback(Callback):
+
+    def __init__(self,
+                 output_file,
+                 select_metrics=None,
+                 call_on_epoch=False,
+                 record_frequency= 10,
+                 plot_frequency = 20,
+                 smoothen=None,
+                 smooth_more_at=None,
+                 suppress_outliers=True,
+                 publish=None,
+                 dtype='float16'):
+        '''
+        Requires plotly. If metrics cannot be found, the callback will give up on the 100th try.
+        
+        select_metrics: select which metrics to plot.
+                        - a list of explicit names, e.g. ["accuracy","recall"]
+                        - an individual name
+                        - a (list of) names with wildcards, e.g. "accuracy_*"
+        
+        call_on_epoch: calls the data recording and plotting at the end of each epoch
+        
+        record_frequency: (only if call_on_epoch=False) 
+                           records data every N batches (to keep memory consumption low)
+        plot_frequency: (only if call_on_epoch=False) 
+                        make the plot every N RECORDS 
+                        (so a plot will be made every record_frequency*plot_frequency batches)
+                        Also triggers saving a pandas dataframe with the raw data
+                        
+        smoothen: smoothen the plot. Window size for the Savitzky-Golay filter. For batch-wise recording values around
+                  50 are usually a good choice. 
+                  The raw data saved as pandas dataframe will not be affected.
+        
+        smooth_more_at: Start to smoothen more when more than <smooth_more_at> points are collected
+                        such that plot remains readable. 
+                        The raw data saved as pandas dataframe will not be affected.
+        
+        suppress_outliers: suppresses outliers before smoothing using a hampel filter.
+                           The raw data saved as pandas dataframe will not be affected.
+                        
+        publish: uses scp or cp to copy the output file to another location (e.g. from a cluster to a website server).
+                 if the path contains and "@", it will use scp. This only works with configured key pairs or tokens.
+                 The path does not contain the output file name
+                        
+        dtype: data type for data to be stored to keep memory consuption within reason (be careful)
+        
+        
+        Savitzy-Golay:
+        (base): Whittaker, E.T; Robinson, G (1924). The Calculus Of Observations
+                Guest, P.G. (2012) [1961]. "Ch. 7: Estimation of Polynomial Coefficients"
+        (coefficients):  Savitzky, A.; Golay, M.J.E. (1964). "Smoothing and Differentiation of Data by Simplified Least Squares Procedures"
+        
+        '''
+        
+        if select_metrics is not None:
+            assert isinstance(select_metrics,str) or isinstance(select_metrics,list)
+            if isinstance(select_metrics,str):
+                select_metrics=[select_metrics]
+        
+        assert dtype == 'float16' or dtype == 'float32' or dtype == 'int32' 
+        self.dtype = dtype
+        
+        if smoothen is None:
+            if call_on_epoch:
+                smoothen = 5
+            else:
+                smoothen = 51
+        smoothen=int(smoothen)
+        if smoothen>0 and not smoothen%2:
+            smoothen+=1
+        
+        if smooth_more_at is None:
+            smooth_more_at = 500
+        else:
+            assert isinstance(smooth_more_at,int) and smooth_more_at >= 0
+            
+        self.smoothen = smoothen    
+        self.smooth_more_at = smooth_more_at  
+        self.suppress_outliers = suppress_outliers  
+        self.output_file=output_file
+        self.select_metrics=select_metrics
+        self.record_frequency = record_frequency
+        self.plot_frequency = plot_frequency
+        self.record_counter=-1
+        self.plot_counter=0
+        self._thread=None
+        self.call_on_epoch = call_on_epoch
+        self.publish = publish
+        self.data={}
+        self.len=0
+        self.give_up_counter=0
+        
+        #check if pre-recorded data exists, in case a training is resumed
+        recordsfile = self.output_file+'.df.pkl'
+        if os.path.isfile(recordsfile):
+            import pandas as pd
+            df = pd.read_pickle(recordsfile)
+            self.data=df.to_dict('list')
+            for k in self.data.keys():
+                self.len = len(self.data[k])
+                break
+            
+        
+    def _record_data(self,logs):
+        #log is dict with simple scalars
+        if self.give_up_counter == 99:#just give up
+            print('Giving up trying to find metrics',self.select_metrics)
+            self.give_up_counter+=1
+            return
+        if self.give_up_counter > 99:
+            return
+        
+        if len(self.data) == 0: #build the dict at first call
+            
+            for k in logs.keys():
+                if self.select_metrics is None:
+                    self.data[k]=np.array([logs[k]],dtype=self.dtype)#enough, keep memory consumption ok
+                else:
+                    if k in self.select_metrics:
+                        self.data[k]=np.array([logs[k]],dtype=self.dtype)
+                    else:
+                        import fnmatch
+                        for sm in self.select_metrics:
+                            if fnmatch.fnmatch(k,sm):
+                                self.data[k]=np.array([logs[k]],dtype=self.dtype)
+            if len(self.data) == 0:
+                print('could not find metrics',self.select_metrics,'in',logs.keys())
+                self.give_up_counter += 1
+        else:
+            for k in self.data.keys(): #already determined
+                self.data[k] = np.concatenate([self.data[k],np.array([logs[k]],dtype=self.dtype)],axis=0)
+                self.len = len(self.data[k])
+    
+    def _make_plot_worker(self):
+        if self.len < 2:
+            return
+        import pandas as pd
+        pd.options.plotting.backend = "plotly"
+        #save original data
+        
+        df = pd.DataFrame().from_dict(self.data)
+        df.to_pickle(self.output_file+'.df.pkl')#save snapshot
+        
+        print_smoothed = self.smoothen > 3 and self.len > self.smoothen+1
+        allkeys = self.data.keys()
+        
+        #determine smoothing parameters
+        window = self.smoothen
+        if self.smooth_more_at and self.len > self.smooth_more_at:#smoothen more for large data sets
+            window = self.smoothen+self.len//self.smooth_more_at 
+            if not window%2:
+                window +=1
+                
+        for c in allkeys:
+            df[c+'_raw']=df[c].copy()
+            if print_smoothed:
+                from scipy.signal import savgol_filter
+                if self.suppress_outliers:
+                    df[c] = hampel(df[c])
+                df[c] = savgol_filter(df[c], window_length = window, polyorder = 3)
+                
+        
+        fig = df.plot(template = 'plotly_dark',y=[k for k in allkeys])
+        
+        if print_smoothed:
+            fig.update_layout(
+                    updatemenus=[
+                        dict(type="buttons",direction="right",x=0.5,y=1.2,
+                            showactive=False,
+                            buttons=list(
+                                [
+                                    dict(
+                                        label="Smooth",
+                                        method="update",
+                                        args=[{"y": [df[k] for k in allkeys]}],
+                                    ),
+                                    dict(
+                                        label="Raw",
+                                        method="update",
+                                        args=[{"y": [df[k+'_raw'] for k in allkeys]}],
+                                    ),
+                                ]
+                            ),
+                        )
+                    ]
+                )
+        
+        fig.write_html(self.output_file)
+        
+        if self.publish is not None:
+            publish(self.output_file, self.publish)
+    
+    def _make_plot(self):
+        #to be multi-processed
+        if self._thread is not None:
+            self._thread.join(120)#wait two minutes
+        
+        self._thread = Process(target=self._make_plot_worker)
+        self._thread.start()
+        
+        
+    def on_batch_end(self,batch,logs={}):
+        
+        if self.record_counter<0 and not self.call_on_epoch: #always record first
+            self.record_counter=0
+            self._record_data(logs)
+            return
+            
+        if self.call_on_epoch:
+            return
+        if self.record_counter < self.record_frequency-1:
+            self.record_counter+=1
+            return
+        self.record_counter=0
+        #record data
+        self._record_data(logs)
+        
+        if self.plot_counter < self.plot_frequency-1:
+            self.plot_counter+=1
+            return
+        self.plot_counter=0
+        #make plot
+        self._make_plot()
+        
+    def on_epoch_end(self,epoch,logs={}):
+        if not self.call_on_epoch:
+            return
+        self._record_data(logs)
+        self._make_plot()
+       
+
+        
+class PrintSummary(Callback):
+    def __init__(self):
+        self.aimed=True
+        self.model=None
+        super(PrintSummary, self).__init__()
+        
+    def set_model(self,model):
+        self.model = model
+        
+    def on_batch_end(self, batch, logs={}):
+        if not self.aimed:
+            return
+        
+        self.aimed=False
+        print(self.model.summary())
+               
+class plot_loss_or_metric(Callback):
+    def __init__(self,outputDir,metrics):
+        self.metrics=metrics
+        self.outputDir=outputDir
+        
+    def on_epoch_end(self,epoch, logs={}):
+        lossfile=os.path.join( self.outputDir, 'full_info.log')
+        allinfo_history=None
+        with open(lossfile, 'r') as infile:
+            allinfo_history=json.load(infile)
+            
+        nepochs=len(allinfo_history)
+        allnumbers=[[] for i in range(len(self.metrics))]
+        epochs=[]
+        for i in range(nepochs):
+            epochs.append(i)
+            for j in range(len(self.metrics)):
+                allnumbers[j].append(allinfo_history[i][self.metrics[j]])
+        
+        import matplotlib.pyplot as plt
+        for j in range(len(self.metrics)):
+            f = plt.figure()
+            plt.plot(epochs,allnumbers[j],'r',label=self.metrics[j])
+            plt.ylabel(self.metrics[j])
+            plt.xlabel('epoch')
+            #plt.legend()
+            f.savefig(self.outputDir+'/'+self.metrics[j]+'.pdf')
+            plt.close()
+    
 
 class newline_callbacks_begin(Callback):
     
@@ -24,51 +356,177 @@ def __init__(self,outputDir,plotLoss=False):
         self.full_logs=[]
         self.plotLoss=plotLoss
         
-    def on_epoch_end(self,epoch, epoch_logs={}):
+    def on_epoch_end(self,epoch, logs={}):
+        if len(logs)<1:
+            return
         import os
         lossfile=os.path.join( self.outputDir, 'losses.log')
         print('\n***callbacks***\nsaving losses to '+lossfile)
-        self.loss.append(epoch_logs.get('loss'))
-        self.val_loss.append(epoch_logs.get('val_loss'))
+        
+        # problem with new keras version calling callbacks even after exceptions
+        if logs.get('loss') is None:
+            return 
+        if logs.get('val_loss') is None:
+            return
+        
+        self.loss.append(logs.get('loss'))
+        self.val_loss.append(logs.get('val_loss'))
         f = open(lossfile, 'a')
-        f.write(str(epoch_logs.get('loss')))
+        f.write(str(logs.get('loss')))
         f.write(" ")
-        f.write(str(epoch_logs.get('val_loss')))
+        f.write(str(logs.get('val_loss')))
         f.write("\n")
         f.close()    
         learnfile=os.path.join( self.outputDir, 'learn.log')
-        with open(learnfile, 'a') as f:
-            f.write(str(float(K.get_value(self.model.optimizer.lr)))+'\n')
-        
-        normed = {}
-        for vv in epoch_logs:
-            normed[vv] = float(epoch_logs[vv])
-        self.full_logs.append(normed)
-        lossfile=os.path.join( self.outputDir, 'full_info.log')
-        with open(lossfile, 'w') as out:
-            out.write(json.dumps(self.full_logs))
+        try:
+            with open(learnfile, 'a') as f:
+                f.write(str(float(K.get_value(self.model.optimizer.lr)))+'\n')
             
+            lossfile=os.path.join( self.outputDir, 'full_info.log')
+            if os.path.isfile(lossfile):
+                with open(lossfile, 'r') as infile:
+                    self.full_logs=json.load(infile)
+                
+            normed = {}
+            for vv in logs:
+                normed[vv] = float(logs[vv])
+            self.full_logs.append(normed)
+            
+            with open(lossfile, 'w') as out:
+                out.write(json.dumps(self.full_logs))
+        except:
+            pass
+                
         if self.plotLoss:
-            plotLoss(self.outputDir+'/losses.log',self.outputDir+'/losses.pdf',[])
+            try:
+                plotLoss(self.outputDir+'/losses.log',self.outputDir+'/losses.pdf',[])
+            except:
+                pass
+
+class batch_callback_begin(Callback):
+
+    def __init__(self,outputDir,plotLoss=False,plot_frequency=-1,batch_frequency=1):
+        self.outputDir=outputDir
+        self.loss=[]
+        self.val_loss=[]
+        self.full_logs=[]
+        self.plotLoss=plotLoss
+        self.plot_frequency=plot_frequency
+        self.plotcounter=0
+        self.batch_frequency=batch_frequency
+        self.batchcounter=0
+
+
+    
+        
+    def read(self):
+        
+        import os
+        if not os.path.isfile(self.outputDir+'/batch_losses.log') :
+            return
+        blossfile=os.path.join( self.outputDir, 'batch_losses.log')
+        f = open(blossfile, 'r')
+        self.loss = []
+        for line in f:
+            if len(line)<1: continue
+            tl=float(line.split(' ')[0])
+            self.loss.append(tl)
+        
+        f.close() 
+            
+    def on_batch_end(self,batch,logs={}):
+        if len(logs)<1:
+            return
+        if logs.get('loss') is None:
+            return 
+        self.batchcounter += 1
+        
+        if not self.batch_frequency == self.batchcounter:
+            return
+        self.batchcounter=0
+        
+        self.loss.append(logs.get('loss'))
+        
+        if self.plot_frequency<0:
+            return 
+        self.plotcounter+=1
+        if self.plot_frequency == self.plotcounter:
+            self.plot()
+            self.plotcounter = 0
+         
+        
+    def _plot(self):
+        if len(self.loss) < 2:
+            return 
+        batches = [self.batch_frequency*i for i in range(len(self.loss))]
+        plt.close()
+        plt.plot(batches,self.loss,'r-',label='loss')
+        
+        def smooth(y, box_pts):
+            box = np.ones(box_pts)/box_pts
+            y_smooth = np.convolve(y, box, mode='same')
+            return y_smooth
+        
+        if len(batches) > 50:
+            smoothed = smooth(self.loss,50)
+            #remove where the simple smoothing doesn't give reasonable results
+            plt.plot(batches[25:-25],smoothed[25:-25],'g-',label='smoothed')
+            plt.legend()
+        
+        plt.xlabel("# batches")
+        plt.ylabel("training loss")
+        plt.yscale("log")
+        plt.savefig(self.outputDir+'/batch_losses.pdf')
+        plt.close()
+        
+     
+    def plot(self):
+        self._plot()
+        
+    def save(self):
+        
+        import os
+        blossfile=os.path.join( self.outputDir, 'batch_losses.log')
+        f = open(blossfile, 'w')
+        for i in range(len(self.loss)):
+            f.write(str(self.loss[i]))
+            f.write("\n")
+        self.loss=[]
+        self.val_loss=[]
+        f.close()      
+        
+        
+    def on_epoch_end(self,epoch,logs={}):
+        self.plot()
+        self.save()
+        
+    def on_epoch_begin(self, epoch, logs=None):
+        self.read()
+        if len(self.loss):
+            self.plot()
         
 class newline_callbacks_end(Callback):
-    def on_epoch_end(self,epoch, epoch_logs={}):
+    def on_epoch_end(self,epoch, logs={}):
         print('\n***callbacks end***\n')
         
         
 class Losstimer(Callback):
-    def __init__(self, every = 5):
+    def __init__(self, every = 50):
         self.points = []
         self.every = every
+        self.counter=0
 
     def on_train_begin(self, logs):
         self.start = time()
 
-    def on_batch_end(self, batch, logs):
-        if (batch % self.every) != 0: return
+    def on_batch_end(self, batch, logs={}):
+        if (self.counter != self.every): 
+            self.counter+=1
+            return
+        self.counter = 0
         elapsed = time() - self.start
         cop = {}
-        for i, j in logs.iteritems():
+        for i, j in logs.items():
             cop[i] = float(j)
         cop['elapsed'] = elapsed
         self.points.append(cop)
@@ -80,21 +538,43 @@ def __init__(self,cutofftime_hours=48):
         self.cutofftime_hours=cutofftime_hours
         
     def on_epoch_begin(self, epoch, logs=None):
-        from tokenTools import checkTokens
+        from .tokenTools import checkTokens
         checkTokens(self.cutofftime_hours)
         
 class saveCheckPointDeepJet(Callback):
     '''
-    this seems obvious, however for some reason the keras model checkpoint fails
-    to save the optimizer state, needed for resuming a training. Therefore this explicit
-    implementation.
+    Slight extension of the normal checkpoint to multiple checkpoints per epoch
     '''
     
-    def __init__(self,outputDir,model):
-        self.outputDir=outputDir
+    def __init__(self,outputFile,model,check_n_batches=-1,nrotate=3):
+        self.outputFile=outputFile
         self.djmodel=model
-    def on_epoch_end(self,epoch, epoch_logs={}):
-        self.djmodel.save(self.outputDir+"/KERAS_check_model_last.h5")
+        self.counter=0
+        self.rotate_idx=0
+        self.rotations=[str(i) for i in range(nrotate)]
+        self.check_n_batches=check_n_batches
+        
+    def on_batch_end(self,batch,logs={}):
+        if self.check_n_batches < 1:
+            return
+        if self.counter < self.check_n_batches:
+            self.counter+=1
+            return
+        self.djmodel.save(self.outputFile[:-3]+'_rot_'+self.rotations[self.rotate_idx]+'.h5')
+        self.djmodel.save(self.outputFile)
+        self.counter=0
+        self.rotate_idx += 1
+        if self.rotate_idx >= len(self.rotations):
+            self.rotate_idx=0
+        
+    def on_epoch_end(self,epoch, logs={}):
+        if len(logs)<1:
+            return
+        if logs.get('loss') is None:
+            return 
+        if logs.get('val_loss') is None:
+            return
+        self.djmodel.save(self.outputFile)
         
         
 class DeepJet_callbacks(object):
@@ -109,15 +589,23 @@ def __init__(self,
                  outputDir='',
                  minTokenLifetime=5,
                  checkperiod=10,
-                 plotLossEachEpoch=True):
+                 backup_after_batches=-1,
+                 checkperiodoffset=0,
+                 plotLossEachEpoch=True, 
+                 additional_plots=None,
+                 batch_loss = False,
+                 print_summary_after_first_batch=False):
         
 
-        
         self.nl_begin=newline_callbacks_begin(outputDir,plotLossEachEpoch)
         self.nl_end=newline_callbacks_end()
         
         self.callbacks=[self.nl_begin]
         
+        if batch_loss:
+            self.batch_callback=batch_callback_begin(outputDir,plotLossEachEpoch)
+            self.callbacks.append(self.batch_callback)
+
         if minTokenLifetime>0:
             self.tokencheck=checkTokens_callback(minTokenLifetime)
             self.callbacks.append(self.tokencheck)
@@ -136,10 +624,12 @@ def __init__(self,
         self.callbacks.append(self.modelbestcheck)
         
         if checkperiod>0:
-            self.modelcheckperiod=ModelCheckpoint(outputDir+"/KERAS_check_model_epoch{epoch:02d}.h5", verbose=1,period=checkperiod, save_weights_only=False)
+            self.modelcheckperiod=ModelCheckpoint(outputDir+"/KERAS_check_model_block_"+str(checkperiodoffset)+"_epoch_{epoch:02d}.h5", 
+                                                  verbose=1,period=checkperiod, save_weights_only=False)
             self.callbacks.append(self.modelcheckperiod)
         
-        self.modelcheck=saveCheckPointDeepJet(outputDir,model)
+        self.modelcheck=saveCheckPointDeepJet(outputDir+"/KERAS_check_model_last.h5",model,backup_after_batches)
+        self.callbacks.append(self.modelcheck)
         
         if stop_patience>0:
             self.stopping = EarlyStopping(monitor='val_loss', 
@@ -147,9 +637,98 @@ def __init__(self,
                                           verbose=1, mode='min')
             self.callbacks.append(self.stopping)
         
-  
+        if additional_plots:
+            self.additionalplots = plot_loss_or_metric(outputDir,additional_plots)
+            self.callbacks.append(self.additionalplots)
+            
         self.history=History()
-        self.timer = Losstimer()
-        
   
-        self.callbacks.extend([ self.nl_end, self.history,self.timer])
+        self.callbacks.extend([ self.nl_end, self.history])
+        if print_summary_after_first_batch:
+            self.callbacks += [PrintSummary()]
+        
+        
+        
+from DeepJetCore.TrainData import TrainData
+from DeepJetCore.dataPipeline import TrainDataGenerator
+
+class PredictCallback(Callback):
+    
+    def __init__(self, 
+                 samplefile,
+                 function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth])
+                 after_n_batches=50,
+                 batchsize=10,
+                 on_epoch_end=False,
+                 use_event=0,
+                 decay_function=None,
+                 offset=0
+                 ):
+        super(PredictCallback, self).__init__()
+        self.samplefile=samplefile
+        self.function_to_apply=function_to_apply
+        self.counter=0
+        self.call_counter=offset
+        self.decay_function=decay_function
+        
+        self.after_n_batches=after_n_batches
+        self.run_on_epoch_end=on_epoch_end
+        
+        if self.run_on_epoch_end and self.after_n_batches>=0:
+            print('PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end')
+            self.after_n_batches=0
+        
+        td=TrainData()
+        td.readFromFile(samplefile)
+        if use_event>=0:
+            td.skim(use_event)
+            
+        self.batchsize = 1    
+        self.td = td
+        self.gen = TrainDataGenerator()
+        self.gen.setBatchSize(batchsize)
+        self.gen.setSkipTooLargeBatches(False)
+
+    
+    def reset(self):
+        self.call_counter=0
+    
+    def predict_and_call(self,counter):
+        
+        self.gen.setBuffer(self.td)
+        
+        predicted = self.model.predict_generator(self.gen.feedNumpyData(),
+                                            steps=self.gen.getNBatches(),
+                                            max_queue_size=1,
+                                            use_multiprocessing=False,
+                                            verbose=2)
+        
+        if not (isinstance(predicted, list) or isinstance(predicted, dict)):
+            predicted=[predicted]
+        
+        self.function_to_apply(self.call_counter,self.td.copyFeatureListToNumpy(False),
+                               predicted,self.td.copyTruthListToNumpy(False))
+        self.call_counter+=1
+    
+    def on_epoch_end(self, epoch, logs=None):
+        self.counter=0
+        if not self.run_on_epoch_end: return
+        self.predict_and_call(epoch)
+        
+    def on_batch_end(self, batch, logs=None):
+        if self.after_n_batches<=0: return
+        self.counter+=1
+        if self.counter>self.after_n_batches: 
+            self.counter=0
+            self.predict_and_call(batch)
+            if self.decay_function is not None:
+                self.after_n_batches=self.decay_function(self.call_counter)
+        
+        
+           
+        
+        
+        
+        
+        
+ 
\ No newline at end of file
diff --git a/training/batchTools.py b/training/batchTools.py
new file mode 100644
index 0000000..7ae5539
--- /dev/null
+++ b/training/batchTools.py
@@ -0,0 +1,87 @@
+
+from __future__ import print_function
+
+#can pipe config from stdin to condor_submit!
+#executable, arguments
+
+#add feedback from traindata arguments
+#add not requesting new directory after creating one in the batch submission
+
+from DeepJetCore.training.training_base import training_base
+import os, sys, stat
+
+def submit_batch(trainingbase, walltime=None):
+    
+    subpackage = os.environ['DEEPJETCORE_SUBPACKAGE']
+    
+    commandline = " ".join(trainingbase.argstring)
+    
+    scriptpath = trainingbase.outputDir+'batchscript.sh'
+    
+    condorpath = trainingbase.outputDir+'condor.sub'
+    
+    #create the batch script
+    batch_scipt='''
+#!/bin/zsh
+echo "running DeepJetCore job::setting up environment"
+cd {subpackage}
+pwd
+source {subpackage}/env.sh
+cd {jobdir}
+echo "running DeepJetCore job::training"
+python {fullcommand}
+echo "job done"
+    '''.format(subpackage=subpackage, 
+               jobdir=trainingbase.outputDir, 
+               trainingscript=trainingbase.copied_script,
+               fullcommand=commandline+ ' --isbatchrun')
+    
+    with open(scriptpath,'w') as scriptfile:
+        scriptfile.write(batch_scipt)
+        
+    os.chmod(scriptpath, stat.S_IRWXU |
+              stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
+    
+    if walltime is not None:
+        wt_days=0
+        wt_hours=0
+        rest=walltime
+        if 'd'  in walltime:
+            wt_days = int(rest.split('d')[0])
+            rest = rest.split('d')[1:]
+        if 'h'  in walltime:
+            wt_hours = int(''.join(rest).split('h')[0])
+        print('submitting for ', wt_days, 'days', wt_hours, 'hours')
+        walltime = wt_days*24*3600 + wt_hours*3600
+        
+    else:
+        walltime=1*24*3600 # 1 day standard
+    
+    ncpus=3
+    ncpus+=trainingbase.ngpus
+    
+    condor_file='''
+executable            = /bin/bash
+arguments             = {scriptpath}
+output                = {outdir}batch.out
+error                 = {outdir}batch.err
+log                   = {outdir}batch.log
+getenv = True
++MaxRuntime = {walltime}
+request_GPUs = {ngpus}
+request_cpus = {ncpus}
+queue 1
+    '''.format(scriptpath=scriptpath,
+               outdir=trainingbase.outputDir,
+               walltime=str(walltime),
+               ngpus=trainingbase.ngpus,
+               ncpus=ncpus)
+    
+    with open(condorpath,'w') as condorfile:
+        condorfile.write(condor_file)
+    
+    os.system('condor_submit '+condorpath)
+    print('job submitted')
+    
+    
+    
\ No newline at end of file
diff --git a/training/gpuTools.py b/training/gpuTools.py
new file mode 100644
index 0000000..62300a2
--- /dev/null
+++ b/training/gpuTools.py
@@ -0,0 +1,16 @@
+import os
+
+def DJCSetGPUs(gpus_string: str =""):
+    if not len(gpus_string):
+        import imp
+        try:
+            imp.find_module('setGPU')
+            import setGPU
+        except :
+            print('No GPUs found, running on CPU')
+            #print('DeepJetCore.DJCSetGPU: no GPU specified and automatic setting impossible')
+            #raise e
+    else:
+        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+        os.environ['CUDA_VISIBLE_DEVICES'] = gpus_string
+        print('running on GPU(s) '+gpus_string)
diff --git a/training/tokenTools.py b/training/tokenTools.py
index ceff503..87dfc39 100644
--- a/training/tokenTools.py
+++ b/training/tokenTools.py
@@ -1,10 +1,9 @@
 
-renewtokens=True
+#not used for now
+
 
 def renew_token_process():
-    
-    if not renewtokens:
-        return 
+    return 
      
     import subprocess
     import time 
@@ -18,12 +17,14 @@ def renew_token_process():
         time.sleep(3600)
 
 def checkTokens(cutofftime_hours=48):
-    if not renewtokens:
-        return True
+    return 
+    
     import subprocess
+    import os
     
     klist=""
     try:
+        os.environ['LC_ALL']="en_US.UTF-8"
         klist=str(subprocess.check_output(['klist'],stderr=subprocess.STDOUT))
     except subprocess.CalledProcessError as inst:
         print('klist failed - no token?')#just ignore
@@ -48,10 +49,14 @@ def checkTokens(cutofftime_hours=48):
     
     import datetime
     thistime=datetime.datetime.now()
-    tokentime=datetime.datetime(2000+int(kdate.split('/')[2]) ,
-                                int(kdate.split('/')[0]),
-                                int(kdate.split('/')[1]),
-                                int(ktime.split(':')[0]))
+    month,day,year=kdate.split('/')
+    hour,minu,sec=ktime.split(':')
+    try:
+        tokentime=datetime.datetime(2000+int(year),int(month),int(day),int(hour))
+    except:
+        print('Failed to set token time with mm/dd/yy, attempting dd/mm/yy permutation')
+        tokentime=datetime.datetime(2000+int(year),int(day),int(month),int(hour))
+
     diff=tokentime-thistime
     diff=diff.total_seconds()
     
diff --git a/training/training_base.py b/training/training_base.py
index db36be8..0fc942a 100644
--- a/training/training_base.py
+++ b/training/training_base.py
@@ -1,44 +1,25 @@
 
 
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
 ## to call it from cammand lines
 import sys
 import os
 from argparse import ArgumentParser
 import shutil
-from DeepJetCore.DataCollection import DataCollection
-from pdb import set_trace
+from DeepJetCore import DataCollection
+import tensorflow.keras as keras
+import tensorflow as tf
+import copy
+from .gpuTools import DJCSetGPUs
+
+from ..customObjects import get_custom_objects
+custom_objects_list = get_custom_objects()
+
+
+##helper
 
-import imp
-try:
-    imp.find_module('Losses')
-    from Losses import *
-except ImportError:
-    print('No Losses module found, ignoring at your own risk')
-    global_loss_list = {}
 
-try:
-    imp.find_module('Layers')
-    from Layers import *
-except ImportError:
-    print('No Layers module found, ignoring at your own risk')
-    global_layers_list = {}
 
-try:
-    imp.find_module('Metrics')
-    from Metrics import *
-except ImportError:
-    print('No metrics module found, ignoring at your own risk')
-    global_metrics_list = {}
-custom_objects_list = {}
-custom_objects_list.update(global_loss_list)
-custom_objects_list.update(global_layers_list)
-custom_objects_list.update(global_metrics_list)
 
 
 class training_base(object):
@@ -46,58 +27,66 @@ class training_base(object):
     def __init__(
 				self, splittrainandtest=0.85,
 				useweights=False, testrun=False,
+                testrun_fraction=0.1,
 				resumeSilently=False, 
-				renewtokens=True,
+				renewtokens=False,
 				collection_class=DataCollection,
-				parser=None
+				parser=None,
+                recreate_silently=False
 				):
         
+        scriptname=sys.argv[0]
+        
         if parser is None: parser = ArgumentParser('Run the training')
         parser.add_argument('inputDataCollection')
         parser.add_argument('outputDir')
         parser.add_argument('--modelMethod', help='Method to be used to instantiate model in derived training class', metavar='OPT', default=None)
-        parser.add_argument("--gpu",  help="select specific GPU",   type=int, metavar="OPT", default=-1)
+        parser.add_argument("--gpu",  help="select specific GPU", metavar="OPT", default="")
         parser.add_argument("--gpufraction",  help="select memory fraction for GPU",   type=float, metavar="OPT", default=-1)
+        parser.add_argument("--submitbatch",  help="submits the job to condor" , default=False, action="store_true")
+        parser.add_argument("--walltime",  help="sets the wall time for the batch job, format: 1d5h or 2d or 3h etc" , default='1d')
+        parser.add_argument("--isbatchrun",   help="is batch run", default=False, action="store_true")
+        parser.add_argument("--valdata",   help="set validation dataset (optional)", default="")
+        parser.add_argument("--takeweights",   help="Applies weights from the model given as relative or absolute path. Matches by names and skips layers that don't match.", default="")
+        
         
         args = parser.parse_args()
         self.args = args
-        import os
+        self.argstring = sys.argv
+        #sanity check
+        if args.isbatchrun:
+            args.submitbatch=False
+            resumeSilently=True
+            
+        if args.submitbatch:
+            print('submitting batch job. Model will be compiled for testing before submission (GPU settings being ignored)')
         
         
         import matplotlib
         #if no X11 use below
         matplotlib.use('Agg')
-        if args.gpu<0:
-            import imp
-            try:
-                imp.find_module('setGPU')
-                import setGPU
-            except ImportError:
-                found = False
-        else:
-            os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
-            os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
-            print('running on GPU '+str(args.gpu))
+        DJCSetGPUs(args.gpu)
         
         if args.gpufraction>0 and args.gpufraction<1:
-            import sys
-            import tensorflow as tf
             gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpufraction)
             sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
-            import keras
-            from keras import backend as K
+            from tensorflow.keras import backend as K
             K.set_session(sess)
             print('using gpu memory fraction: '+str(args.gpufraction))
         
+        self.ngpus=1
+        self.dist_strat_scope=None
+        if len(args.gpu):
+            self.ngpus=len([i for i in args.gpu.split(',')])
+            print('running on '+str(self.ngpus)+ ' gpus')
+            if self.ngpus > 1:
+                self.dist_strat_scope = tf.distribute.MirroredStrategy()
             
-            
-        
-        import keras
-                
         self.keras_inputs=[]
         self.keras_inputsshapes=[]
         self.keras_model=None
         self.keras_model_method=args.modelMethod
+        self.keras_weight_model_path=args.takeweights
         self.train_data=None
         self.val_data=None
         self.startlearningrate=None
@@ -106,7 +95,14 @@ def __init__(
         self.compiled=False
         self.checkpointcounter=0
         self.renewtokens=renewtokens
+        if args.isbatchrun:
+            self.renewtokens=False
+        self.callbacks=None
+        self.custom_optimizer=False
+        self.copied_script=""
+        self.submitbatch=args.submitbatch
         
+        self.GAN_mode=False
         
         self.inputData = os.path.abspath(args.inputDataCollection) \
 												 if ',' not in args.inputDataCollection else \
@@ -116,58 +112,102 @@ def __init__(
         
         isNewTraining=True
         if os.path.isdir(self.outputDir):
-            if not resumeSilently:
-                var = raw_input('output dir exists. To recover a training, please type "yes"\n')
+            if not (resumeSilently or recreate_silently):
+                var = input('output dir exists. To recover a training, please type "yes"\n')
                 if not var == 'yes':
                     raise Exception('output directory must not exists yet')
-            isNewTraining=False     
+            isNewTraining=False
+            if recreate_silently:
+                isNewTraining=True     
         else:
             os.mkdir(self.outputDir)
         self.outputDir = os.path.abspath(self.outputDir)
         self.outputDir+='/'
         
+        if recreate_silently:
+            os.system('rm -rf '+ self.outputDir +'*')
+        
         #copy configuration to output dir
-        # move this part to the individual subpackage
-        #if isNewTraining:
-        #    djsource= os.environ['DEEPJET']
-        #    shutil.copytree(djsource+'/modules/models', self.outputDir+'models')
-        #    shutil.copyfile(sys.argv[0],self.outputDir+sys.argv[0])
-
-            
-            
+        if not args.isbatchrun:
+            try:
+                shutil.copyfile(scriptname,self.outputDir+os.path.basename(scriptname))
+            except shutil.SameFileError:
+                pass
+            except BaseException as e:
+                raise e
+                
+            self.copied_script = self.outputDir+os.path.basename(scriptname)
+        else:
+            self.copied_script = scriptname
+        
         self.train_data = collection_class()
         self.train_data.readFromFile(self.inputData)
         self.train_data.useweights=useweights
         
-        if testrun:
-            self.train_data.split(0.002)
+        if len(args.valdata):
+            print('using validation data from ',args.valdata)
+            self.val_data = DataCollection(args.valdata)
+        
+        else:
+            if testrun:
+                if len(self.train_data)>1:
+                    self.train_data.split(testrun_fraction)
             
-        self.val_data=self.train_data.split(splittrainandtest)
+                self.train_data.dataclass_instance=None #can't be pickled
+                self.val_data=copy.deepcopy(self.train_data)
+                
+            else:    
+                self.val_data=self.train_data.split(splittrainandtest)
         
 
 
-        shapes=self.train_data.getInputShapes()
-        self.train_data.maxFilesOpen=-1
+        shapes = self.train_data.getNumpyFeatureShapes()
+        inputdtypes = self.train_data.getNumpyFeatureDTypes()
+        inputnames= self.train_data.getNumpyFeatureArrayNames()
+        for i in range(len(inputnames)): #in case they are not named
+            if inputnames[i]=="" or inputnames[i]=="_rowsplits":
+                inputnames[i]="input_"+str(i)+inputnames[i]
+
+
+        print("shapes", shapes)
+        print("inputdtypes", inputdtypes)
+        print("inputnames", inputnames)
         
         self.keras_inputs=[]
         self.keras_inputsshapes=[]
-        
-        print(shapes)
-        
-        for s in shapes:
-            self.keras_inputs.append(keras.layers.Input(shape=s))
+
+        for s,dt,n in zip(shapes,inputdtypes,inputnames):
+            self.keras_inputs.append(keras.layers.Input(shape=s, dtype=dt, name=n))
             self.keras_inputsshapes.append(s)
             
+        #bookkeeping
+        self.train_data.writeToFile(self.outputDir+'trainsamples.djcdc',abspath=True)
+        self.val_data.writeToFile(self.outputDir+'valsamples.djcdc',abspath=True)
+            
         if not isNewTraining:
-            kfile = self.outputDir+'/KERAS_check_model_last.h5' \
-							 if os.path.isfile(self.outputDir+'/KERAS_check_model_last.h5') else \
-							 self.outputDir+'/KERAS_model.h5'
+            kfile = self.outputDir+'/KERAS_check_model_last.h5'
             if not os.path.isfile(kfile):
-                print('you cannot resume a training that did not train for at least one epoch.\nplease start a new training.')
-                exit()
-            self.loadModel(kfile)
-            self.trainedepoches=sum(1 for line in open(self.outputDir+'losses.log'))
-        
+                kfile = self.outputDir+'/KERAS_check_model_last' #savedmodel format
+                if not os.path.isdir(kfile):
+                    kfile=''
+            if len(kfile):
+                print('loading model',kfile)
+                
+                if self.dist_strat_scope is not None:
+                    with self.dist_strat_scope.scope():
+                        self.loadModel(kfile)
+                else:
+                    self.loadModel(kfile)
+                self.trainedepoches=0
+                if os.path.isfile(self.outputDir+'losses.log'):
+                    for line in open(self.outputDir+'losses.log'):
+                        valloss = line.split(' ')[1][:-1]
+                        if not valloss == "None":
+                            self.trainedepoches+=1
+                else:
+                    print('incomplete epochs, starting from the beginning but with pretrained model')
+            else:
+                print('no model found in existing output dir, starting training from scratch')
         
     def __del__(self):
         if hasattr(self, 'train_data'):
@@ -175,66 +215,118 @@ def __del__(self):
             del self.val_data
         
     def modelSet(self):
-        return not self.keras_model==None
+        return (not self.keras_model==None) and not len(self.keras_weight_model_path)
+        
+    def setDJCKerasModel(self,model,*args,**kwargs): 
+        if len(self.keras_inputs)<1:
+            raise Exception('setup data first')   
+        self.keras_model=model(*args,**kwargs)
+        if hasattr(self.keras_model, "_is_djc_keras_model"):
+            self.keras_model.setInputShape(self.keras_inputs)
+            self.keras_model.build(None)
+        if not self.keras_model:
+            raise Exception('Setting DJCKerasModel not successful') 
+        
         
     def setModel(self,model,**modelargs):
         if len(self.keras_inputs)<1:
             raise Exception('setup data first') 
-        self.keras_model=model(self.keras_inputs,
-                               self.train_data.getNClassificationTargets(),
-                               self.train_data.getNRegressionTargets(),
-                               **modelargs)
+        if self.dist_strat_scope is not None:
+            with self.dist_strat_scope.scope():
+                self.keras_model=model(self.keras_inputs,**modelargs)
+        else:
+            self.keras_model=model(self.keras_inputs,**modelargs)
+        if hasattr(self.keras_model, "_is_djc_keras_model"): #compatibility
+            self.keras_model.setInputShape(self.keras_inputs)
+            self.keras_model.build(None)
+            
+        if len(self.keras_weight_model_path):
+            from DeepJetCore.modeltools import apply_weights_where_possible, load_model
+            self.keras_model = apply_weights_where_possible(self.keras_model, 
+                                         load_model(self.keras_weight_model_path))
+        #try:
+        #    self.keras_model=model(self.keras_inputs,**modelargs)
+        #except BaseException as e:
+        #    print('problem in setting model. Reminder: since DJC 2.0, NClassificationTargets and RegressionTargets must not be specified anymore')
+        #    raise e
         if not self.keras_model:
             raise Exception('Setting model not successful') 
         
-    def defineCustomPredictionLabels(self, labels):
-        self.train_data.defineCustomPredictionLabels(labels)
-        self.val_data.defineCustomPredictionLabels(labels)
     
     def saveCheckPoint(self,addstring=''):
         
         self.checkpointcounter=self.checkpointcounter+1 
-        self.saveModel("KERAS_model_checkpoint_"+str(self.checkpointcounter)+"_"+addstring +".h5")    
+        self.saveModel("KERAS_model_checkpoint_"+str(self.checkpointcounter)+"_"+addstring)    
            
-        
+    
+    def _loadModel(self,filename):
+        from tensorflow.keras.models import load_model
+        keras_model=load_model(filename, custom_objects=custom_objects_list)
+        optimizer=keras_model.optimizer
+        return keras_model, optimizer
+                
     def loadModel(self,filename):
-        from keras.models import load_model
-        self.keras_model=load_model(filename, custom_objects=custom_objects_list)
-        self.optimizer=self.keras_model.optimizer
+        self.keras_model, self.optimizer = self._loadModel(filename)
         self.compiled=True
+        if self.ngpus>1:
+            self.compiled=False
+        
+    def setCustomOptimizer(self,optimizer):
+        self.optimizer = optimizer
+        self.custom_optimizer=True
         
     def compileModel(self,
                      learningrate,
+                     clipnorm=None,
+                     print_models=False,
+                     metrics=None,
+                     is_eager=False,
                      **compileargs):
-        if not self.keras_model:
+        if not self.keras_model and not self.GAN_mode:
             raise Exception('set model first') 
 
-        from keras.optimizers import Adam
+        if self.ngpus>1 and not self.submitbatch:
+            print('Model being compiled for '+str(self.ngpus)+' gpus')
+            
         self.startlearningrate=learningrate
-        self.optimizer = Adam(lr=self.startlearningrate)
-        self.keras_model.compile(optimizer=self.optimizer,**compileargs)
+        
+        if not self.custom_optimizer:
+            from tensorflow.keras.optimizers import Adam
+            if clipnorm:
+                self.optimizer = Adam(lr=self.startlearningrate,clipnorm=clipnorm)
+            else:
+                self.optimizer = Adam(lr=self.startlearningrate)
+            
+            
+        
+        if self.dist_strat_scope is not None:
+            with self.dist_strat_scope.scope():
+                self.keras_model.compile(optimizer=self.optimizer,metrics=metrics,**compileargs)
+        else:
+            self.keras_model.compile(optimizer=self.optimizer,metrics=metrics,**compileargs)
+            
+        if is_eager:
+            #call on one batch to fully build it
+            self.keras_model(self.train_data.getExampleFeatureBatch())
+            
+        if print_models:
+            print(self.keras_model.summary())
         self.compiled=True
 
     def compileModelWithCustomOptimizer(self,
                                         customOptimizer,
                                         **compileargs):
-        if not self.keras_model:
-            raise Exception('set model first') 
-        self.optimizer = customOptimizer
-        self.keras_model.compile(optimizer=self.optimizer,**compileargs)
-        self.compiled=True
+        raise Exception('DEPRECATED: please use setCustomOptimizer before calling compileModel') 
+        
         
     def saveModel(self,outfile):
-        self.keras_model.save(self.outputDir+outfile)
-        import tensorflow as tf
-        import keras.backend as K
-        tfsession=K.get_session()
-        saver = tf.train.Saver()
-        tfoutpath=self.outputDir+outfile+'_tfsession/tf'
-        import os
-        os.system('rm -rf '+tfoutpath)
-        os.system('mkdir -p '+tfoutpath)
-        saver.save(tfsession, tfoutpath)
+        if not self.GAN_mode:
+            self.keras_model.save(self.outputDir+outfile)
+        else:
+            self.gan.save(self.outputDir+'GAN_'+outfile)
+            self.generator.save(self.outputDir+'GEN_'+outfile)
+            self.discriminator.save(self.outputDir+'DIS_'+outfile)
+        
 
 
         #import h5py
@@ -242,64 +334,73 @@ def saveModel(self,outfile):
         #del f['optimizer_weights']
         #f.close()
         
+    def _initTraining(self,
+                      nepochs,
+                     batchsize,
+                     use_sum_of_squares=False):
+        
+        
+        if self.submitbatch:
+            from DeepJetCore.training.batchTools import submit_batch
+            submit_batch(self, self.args.walltime)
+            exit() #don't delete this!
+        
+        self.train_data.setBatchSize(batchsize)
+        self.val_data.setBatchSize(batchsize)
+        self.train_data.batch_uses_sum_of_squares=use_sum_of_squares
+        self.val_data.batch_uses_sum_of_squares=use_sum_of_squares
+        
+        
+        #make sure tokens don't expire
+        from .tokenTools import checkTokens, renew_token_process
+        from _thread import start_new_thread
+        
+        if self.renewtokens:
+            print('afs backgrounder has proven to be unreliable, use with care')
+            checkTokens()
+            start_new_thread(renew_token_process,())
+        
+        self.train_data.setBatchSize(batchsize)
+        self.val_data.setBatchSize(batchsize)
+        
+        
     def trainModel(self,
                    nepochs,
                    batchsize,
+                   run_eagerly=False,
+                   batchsize_use_sum_of_squares = False,
+                   fake_truth=False,#extend the truth list with dummies. Useful when adding more prediction outputs than truth inputs
                    stop_patience=-1, 
                    lr_factor=0.5,
                    lr_patience=-1, 
                    lr_epsilon=0.003, 
                    lr_cooldown=6, 
                    lr_minimum=0.000001,
-                   maxqsize=20, 
                    checkperiod=10,
+                   backup_after_batches=-1,
+                   additional_plots=None,
+                   additional_callbacks=None,
+                   load_in_mem = False,
+                   max_files = -1,
+                   plot_batch_loss = False,
                    **trainargs):
         
         
-        # check a few things, e.g. output dimensions etc.
-        # need implementation, but probably TF update SWAPNEEL
-        customtarget=self.train_data.getCustomPredictionLabels()
-        if customtarget:
-            pass
-            # work on self.model.outputs
-            # check here if the output dimension of the model fits the custom labels
-        
+        self.keras_model.run_eagerly=run_eagerly
         # write only after the output classes have been added
-        self.train_data.writeToFile(self.outputDir+'trainsamples.dc')
-        self.val_data.writeToFile(self.outputDir+'valsamples.dc')
-        
-        #make sure tokens don't expire
-        from .tokenTools import checkTokens, renew_token_process
-        from thread import start_new_thread
-        
-        if self.renewtokens:
-            print('starting afs backgrounder')
-            checkTokens()
-            start_new_thread(renew_token_process,())
+        self._initTraining(nepochs,batchsize, batchsize_use_sum_of_squares)
         
-        self.train_data.setBatchSize(batchsize)
-        self.val_data.setBatchSize(batchsize)
-        
-        averagesamplesperfile=self.train_data.getAvEntriesPerFile()
-        samplespreread=maxqsize*batchsize
-        nfilespre=max(int(samplespreread/averagesamplesperfile),2)
-        nfilespre+=1
-        nfilespre=min(nfilespre, len(self.train_data.samples)-1)
-        #if nfilespre>15:nfilespre=15
-        print('best pre read: '+str(nfilespre)+'  a: '+str(int(averagesamplesperfile)))
-        print('total sample size: '+str(self.train_data.nsamples))
-        #exit()
-        
-        if self.train_data.maxFilesOpen<0:
-            self.train_data.maxFilesOpen=nfilespre
-            self.val_data.maxFilesOpen=min(int(nfilespre/2),1)
-        
-        #self.keras_model.save(self.outputDir+'KERAS_check_last_model.h5')
+        try: #won't work for purely eager models
+            self.keras_model.save(self.outputDir+'KERAS_untrained_model')
+        except:
+            pass
         print('setting up callbacks')
         from .DeepJet_callbacks import DeepJet_callbacks
+        minTokenLifetime = 5
+        if not self.renewtokens:
+            minTokenLifetime = -1
         
-        
-        callbacks=DeepJet_callbacks(self.keras_model,
+        self.callbacks=DeepJet_callbacks(self.keras_model,
                                     stop_patience=stop_patience, 
                                     lr_factor=lr_factor,
                                     lr_patience=lr_patience, 
@@ -307,33 +408,98 @@ def trainModel(self,
                                     lr_cooldown=lr_cooldown, 
                                     lr_minimum=lr_minimum,
                                     outputDir=self.outputDir,
-                                    checkperiod=checkperiod)
-        nepochs=nepochs-self.trainedepoches
-        print('starting training')
-        self.keras_model.fit_generator(self.train_data.generator() ,
-                            steps_per_epoch=self.train_data.getNBatchesPerEpoch(), 
-                            epochs=nepochs,
-                            callbacks=callbacks.callbacks,
-                            validation_data=self.val_data.generator(),
-                            validation_steps=self.val_data.getNBatchesPerEpoch(), #)#,
-                            max_q_size=maxqsize,**trainargs)
-        
-        
-        self.saveModel("KERAS_model.h5")
+                                    checkperiod=checkperiod,
+                                    backup_after_batches=backup_after_batches,
+                                    checkperiodoffset=self.trainedepoches,
+                                    additional_plots=additional_plots,
+                                    batch_loss = plot_batch_loss,
+                                    print_summary_after_first_batch=run_eagerly,
+                                    minTokenLifetime = minTokenLifetime)
+        
+        if additional_callbacks is not None:
+            if not isinstance(additional_callbacks, list):
+                additional_callbacks=[additional_callbacks]
+            self.callbacks.callbacks.extend(additional_callbacks)
+            
         
-        import copy
-        #reset all file reads etc
-        tmpdc=copy.deepcopy(self.train_data)
-        del self.train_data
-        self.train_data=tmpdc
+        print('starting training')
+        if load_in_mem:
+            print('make features')
+            X_train = self.train_data.getAllFeatures(nfiles=max_files)
+            X_test = self.val_data.getAllFeatures(nfiles=max_files)
+            print('make truth')
+            Y_train = self.train_data.getAllLabels(nfiles=max_files)
+            Y_test = self.val_data.getAllLabels(nfiles=max_files)
+            self.keras_model.fit(X_train, Y_train, batch_size=batchsize, epochs=nepochs,
+                                 callbacks=self.callbacks.callbacks,
+                                 validation_data=(X_test, Y_test),
+                                 max_queue_size=1,
+                                 use_multiprocessing=False,
+                                 workers=0,    
+                                 **trainargs)
+        else:
         
-        return self.keras_model, callbacks.history
+            #prepare generator 
+        
+            print("setting up generator... can take a while")
+            use_fake_truth=None
+            if fake_truth:
+                if isinstance(self.keras_model.output,dict):
+                    use_fake_truth = [k for k in self.keras_model.output.keys()]
+                elif isinstance(self.keras_model.output,list):
+                    use_fake_truth = len(self.keras_model.output)
+                    
+            traingen = self.train_data.invokeGenerator(fake_truth = use_fake_truth)
+            valgen = self.val_data.invokeGenerator(fake_truth = use_fake_truth)
+
+
+            while(self.trainedepoches < nepochs):
+           
+                #this can change from epoch to epoch
+                #calculate steps for this epoch
+                #feed info below
+                traingen.prepareNextEpoch()
+                valgen.prepareNextEpoch()
+                nbatches_train = traingen.getNBatches() #might have changed due to shuffeling
+                nbatches_val = valgen.getNBatches()
+            
+                print('>>>> epoch', self.trainedepoches,"/",nepochs)
+                print('training batches: ',nbatches_train)
+                print('validation batches: ',nbatches_val)
+                
+                self.keras_model.fit(traingen.feedNumpyData(), 
+                                     steps_per_epoch=nbatches_train,
+                                     epochs=self.trainedepoches + 1,
+                                     initial_epoch=self.trainedepoches,
+                                     callbacks=self.callbacks.callbacks,
+                                     validation_data=valgen.feedNumpyData(),
+                                     validation_steps=nbatches_val,
+                                     max_queue_size=1,
+                                     use_multiprocessing=False,
+                                     workers=0,
+                                     **trainargs
+                )
+                self.trainedepoches += 1
+                traingen.shuffleFileList()
+                #
+        
+            self.saveModel("KERAS_model.h5")
+
+        return self.keras_model, self.callbacks.history
     
     
-        
-
+    
+       
+    def change_learning_rate(self, new_lr):
+        import tensorflow.keras.backend as K
+        if self.GAN_mode:
+            K.set_value(self.discriminator.optimizer.lr, new_lr)
+            K.set_value(self.gan.optimizer.lr, new_lr)
+        else:
+            K.set_value(self.keras_model.optimizer.lr, new_lr)
         
         
+    
+    
         
-            
     
diff --git a/training_pipeline.png b/training_pipeline.png
new file mode 100644
index 0000000..d7ea7d2
Binary files /dev/null and b/training_pipeline.png differ