From eba21a5ccabb69e28a8d306b4c21e1ac2b2946f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karol=20=C5=BBak?= <karol.zak@hotmail.com>
Date: Tue, 20 Aug 2019 15:01:15 +0200
Subject: [PATCH 1/2] update as_batch to work correctly with multiclass labels

there was a missing type() function to determine if label is of type np.ndarray. This is crucial for multiclass labels
---
 mPyPl/keras.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mPyPl/keras.py b/mPyPl/keras.py
index fda7757..e27d119 100644
--- a/mPyPl/keras.py
+++ b/mPyPl/keras.py
@@ -27,7 +27,7 @@ def as_batch(flow, feature_field_name='features', label_field_name='label', batc
                     batch = [np.zeros((batchsize,)+flds[i].shape) for i in feature_field_name]
                 else:
                     batch = np.zeros((batchsize,)+flds[feature_field_name].shape)
-                lbls_shape = lbls.shape if lbls is np.ndarray else (1,)
+                lbls_shape = lbls.shape if type(lbls) is np.ndarray else (1,)
                 labels = np.zeros((batchsize,)+lbls_shape)
             if isinstance(feature_field_name, list):
                 for j,n in enumerate(feature_field_name):

From 49c9155d872a6547ea1f2ba2cb64c545cdb687e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karol=20=C5=BBak?= <karol.zak@hotmail.com>
Date: Wed, 21 Aug 2019 09:57:49 +0200
Subject: [PATCH 2/2] added output dtype parameters to as_batch function

now it will either take the dtype from the provided parameter or from the original data source dtype
---
 mPyPl/keras.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/mPyPl/keras.py b/mPyPl/keras.py
index e27d119..390914a 100644
--- a/mPyPl/keras.py
+++ b/mPyPl/keras.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 @Pipe
-def as_batch(flow, feature_field_name='features', label_field_name='label', batchsize=16):
+def as_batch(flow, feature_field_name='features', label_field_name='label', batchsize=16, out_features_dtype=None, out_labels_dtype=None):
     """
     Split input datastream into a sequence of batches suitable for keras training.
     :param flow: input datastream
@@ -22,18 +22,21 @@ def as_batch(flow, feature_field_name='features', label_field_name='label', batc
             # explicitly compute all fields - this is needed for all fields to be computed only once for on-demand evaluation
             flds = { i : data[i] for i in (feature_field_name if isinstance(feature_field_name, list) else [feature_field_name])}
             lbls = data[label_field_name] # TODO: what happens when label_field_name is a list?
+                
             if batch is None:
                 if isinstance(feature_field_name, list):
-                    batch = [np.zeros((batchsize,)+flds[i].shape) for i in feature_field_name]
+                    batch = [np.zeros((batchsize,)+flds[i].shape, dtype=flds[i].dtype if out_features_dtype is None else out_features_dtype) for i in feature_field_name]
                 else:
-                    batch = np.zeros((batchsize,)+flds[feature_field_name].shape)
+                    batch = np.zeros((batchsize,)+flds[feature_field_name].shape, dtype=flds[feature_field_name].dtype if out_features_dtype is None else out_features_dtype)
+                    
                 lbls_shape = lbls.shape if type(lbls) is np.ndarray else (1,)
-                labels = np.zeros((batchsize,)+lbls_shape)
+                out_labels_dtype = out_labels_dtype if out_labels_dtype is not None else lbls.dtype if type(lbls) is np.ndarray else None
+                labels = np.zeros((batchsize,)+lbls_shape, dtype=out_labels_dtype)
             if isinstance(feature_field_name, list):
                 for j,n in enumerate(feature_field_name):
                     batch[j][i] = flds[n]
             else:
                 batch[i] = flds[feature_field_name]
-            labels[i] = data[label_field_name]
+            labels[i] = lbls
         yield (batch, labels)
         batch = labels = None