From a1e0d5b8d8052a01e813b4b894a5226844f75b55 Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Tue, 24 Aug 2021 12:21:33 +0800
Subject: [PATCH 1/7] for resnet regration

---
 Classification/cnns/of_cnn_train_val.py | 3 ++-
 Classification/cnns/train.sh            | 6 ++++--
 Classification/cnns/train_fp16.sh       | 8 ++------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/Classification/cnns/of_cnn_train_val.py b/Classification/cnns/of_cnn_train_val.py
index 3d5cbbd..3258849 100755
--- a/Classification/cnns/of_cnn_train_val.py
+++ b/Classification/cnns/of_cnn_train_val.py
@@ -21,7 +21,8 @@
 import config as configs
 from util import Snapshot, InitNodes, Metric
 from job_function_util import get_train_config, get_val_config
-import resnet_model
+# import resnet_model
+import resnet_rename as resnet_model
 import resnext_model
 import vgg_model
 import alexnet_model
diff --git a/Classification/cnns/train.sh b/Classification/cnns/train.sh
index 6aa2b80..5a78d34 100755
--- a/Classification/cnns/train.sh
+++ b/Classification/cnns/train.sh
@@ -19,6 +19,7 @@ echo DATA_ROOT=$DATA_ROOT
 LOG_FOLDER=../logs
 mkdir -p $LOG_FOLDER
 LOGFILE=$LOG_FOLDER/resnet_training.log
+export PYTHONUNBUFFERED=1
 
 python3 of_cnn_train_val.py \
      --train_data_dir=$DATA_ROOT/train \
@@ -26,15 +27,16 @@ python3 of_cnn_train_val.py \
      --val_data_dir=$DATA_ROOT/validation \
      --val_data_part_num=256 \
      --num_nodes=1 \
-     --gpu_num_per_node=8 \
+     --gpu_num_per_node=4 \
      --optimizer="sgd" \
      --momentum=0.875 \
      --label_smoothing=0.1 \
      --learning_rate=1.024 \
      --loss_print_every_n_iter=100 \
-     --batch_size_per_device=128 \
+     --batch_size_per_device=32 \
      --val_batch_size_per_device=50 \
      --num_epoch=$NUM_EPOCH \
      --model="resnet50" 2>&1 | tee ${LOGFILE}
+     #--model="resnet50" 2>&1 | tee ${LOGFILE}
 
 echo "Writting log to ${LOGFILE}"
diff --git a/Classification/cnns/train_fp16.sh b/Classification/cnns/train_fp16.sh
index 7ecfa5c..0c59ef0 100755
--- a/Classification/cnns/train_fp16.sh
+++ b/Classification/cnns/train_fp16.sh
@@ -26,18 +26,14 @@ export NCCL_LAUNCH_MODE=PARALLEL
 echo NCCL_LAUNCH_MODE=$NCCL_LAUNCH_MODE
 
 python3 of_cnn_train_val.py \
-     --train_data_dir=$DATA_ROOT/train \
-     --train_data_part_num=256 \
-     --val_data_dir=$DATA_ROOT/validation \
-     --val_data_part_num=256 \
      --num_nodes=1 \
-     --gpu_num_per_node=8 \
+     --gpu_num_per_node=1 \
      --optimizer="sgd" \
      --momentum=0.875 \
      --label_smoothing=0.1 \
      --learning_rate=1.536 \
      --loss_print_every_n_iter=100 \
-     --batch_size_per_device=192 \
+     --batch_size_per_device=64 \
      --val_batch_size_per_device=50 \
      --use_fp16 \
      --channel_last=True \

From 2632d98411449111b560b040c2112b7d26baf123 Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Tue, 24 Aug 2021 14:41:31 +0800
Subject: [PATCH 2/7] var renamed resnet

---
 Classification/cnns/resnet_rename.py | 261 +++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100644 Classification/cnns/resnet_rename.py

diff --git a/Classification/cnns/resnet_rename.py b/Classification/cnns/resnet_rename.py
new file mode 100644
index 0000000..117dad4
--- /dev/null
+++ b/Classification/cnns/resnet_rename.py
@@ -0,0 +1,261 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import oneflow.compatible.single_client as flow
+
+BLOCK_COUNTS = [3, 4, 6, 3]
+BLOCK_FILTERS = [256, 512, 1024, 2048]
+BLOCK_FILTERS_INNER = [64, 128, 256, 512]
+
+
+class ResnetBuilder(object):
+    def __init__(
+        self,
+        weight_regularizer,
+        trainable=True,
+        training=True,
+        channel_last=False,
+        fuse_bn_relu=True,
+        fuse_bn_add_relu=True,
+    ):
+        self.data_format = "NHWC" if channel_last else "NCHW"
+        self.weight_initializer = flow.variance_scaling_initializer(
+            2, "fan_in", "random_normal", data_format=self.data_format
+        )
+        self.weight_regularizer = weight_regularizer
+        self.trainable = trainable
+        self.training = training
+        self.fuse_bn_relu = fuse_bn_relu
+        self.fuse_bn_add_relu = fuse_bn_add_relu
+
+    def _conv2d(
+        self, name, input, filters, kernel_size, strides=1, padding="SAME", dilations=1,
+    ):
+        # There are different shapes of weight metric between 'NCHW' and 'NHWC' mode
+        if self.data_format == "NHWC":
+            shape = (filters, kernel_size, kernel_size, input.shape[3])
+        else:
+            shape = (filters, input.shape[1], kernel_size, kernel_size)
+        weight = flow.get_variable(
+            name + ".weight",
+            shape=shape,
+            dtype=input.dtype,
+            initializer=self.weight_initializer,
+            regularizer=self.weight_regularizer,
+            model_name="weight",
+            trainable=self.trainable,
+        )
+
+        return flow.nn.conv2d(
+            input,
+            weight,
+            strides,
+            padding,
+            None,
+            self.data_format,
+            dilations,
+            name=name,
+        )
+
+    def _batch_norm(self, inputs, name=None, last=False):
+        initializer = flow.zeros_initializer() if last else flow.ones_initializer()
+        axis = 1
+        if self.data_format == "NHWC":
+            axis = 3
+        return flow.layers.batch_normalization(
+            inputs=inputs,
+            axis=axis,
+            momentum=0.9,  # 97,
+            epsilon=1e-5,
+            center=True,
+            scale=True,
+            trainable=self.trainable,
+            training=self.training,
+            gamma_initializer=initializer,
+            moving_variance_initializer=initializer,
+            gamma_regularizer=self.weight_regularizer,
+            beta_regularizer=self.weight_regularizer,
+            name=name,
+        )
+
+    def _batch_norm_relu(self, inputs, name=None, last=False):
+        # if self.fuse_bn_relu:
+        #     initializer = flow.zeros_initializer() if last else flow.ones_initializer()
+        #     axis = 1
+        #     if self.data_format == "NHWC":
+        #         axis = 3
+        #     return flow.layers.batch_normalization_relu(
+        #         inputs=inputs,
+        #         axis=axis,
+        #         momentum=0.9,
+        #         epsilon=1e-5,
+        #         center=True,
+        #         scale=True,
+        #         trainable=self.trainable,
+        #         training=self.training,
+        #         gamma_initializer=initializer,
+        #         moving_variance_initializer=initializer,
+        #         gamma_regularizer=self.weight_regularizer,
+        #         beta_regularizer=self.weight_regularizer,
+        #         name=name + "_bn_relu",
+        #     )
+        # else:
+            # return flow.nn.relu(self._batch_norm(inputs, name + "_bn", last=last))
+        return flow.nn.relu(self._batch_norm(inputs, name, last=last))
+
+    def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
+        # if self.fuse_bn_add_relu:
+        #     initializer = flow.zeros_initializer() if last else flow.ones_initializer()
+        #     axis = 1
+        #     if self.data_format == "NHWC":
+        #         axis = 3
+        #     return flow.layers.batch_normalization_add_relu(
+        #         inputs=inputs,
+        #         addend=addend,
+        #         axis=axis,
+        #         momentum=0.9,
+        #         epsilon=1e-5,
+        #         center=True,
+        #         scale=True,
+        #         trainable=self.trainable,
+        #         training=self.training,
+        #         gamma_initializer=initializer,
+        #         moving_variance_initializer=initializer,
+        #         gamma_regularizer=self.weight_regularizer,
+        #         beta_regularizer=self.weight_regularizer,
+        #         name=name + "_bn_add_relu",
+        #     )
+        # else:
+        return flow.nn.relu(
+            self._batch_norm(inputs, name, last=last) + addend
+        )
+
+    def conv2d_affine(self, input, name, filters, kernel_size, strides):
+        padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
+        output = self._conv2d(name, input, filters, kernel_size, strides, padding)
+        return output
+
+    def bottleneck_transformation(
+        self, input, block_name, filters, filters_inner, strides
+    ):
+        a = self.conv2d_affine(input, block_name + ".conv1", filters_inner, 1, 1)
+        a = self._batch_norm_relu(a, block_name + ".bn1")
+
+        b = self.conv2d_affine(a, block_name + ".conv2", filters_inner, 3, strides)
+        b = self._batch_norm_relu(b, block_name + ".bn2")
+
+        c = self.conv2d_affine(b, block_name + ".conv3", filters, 1, 1)
+        return c
+
+    def residual_block(self, input, block_name, filters, filters_inner, strides_init):
+        if strides_init != 1 or block_name == "layer1.0":
+            shortcut = self.conv2d_affine(
+                input, block_name + ".downsample.0", filters, 1, strides_init
+            )
+            shortcut = self._batch_norm(shortcut, block_name + ".downsample.1")
+        else:
+            shortcut = input
+
+        bottleneck = self.bottleneck_transformation(
+            input, block_name, filters, filters_inner, strides_init,
+        )
+        return self._batch_norm_add_relu(
+            bottleneck, shortcut, block_name + ".bn3", last=True
+        )
+
+    def residual_stage(
+        self, input, stage_name, counts, filters, filters_inner, stride_init=2
+    ):
+        output = input
+        for i in range(counts):
+            block_name = "%s.%d" % (stage_name, i)
+            output = self.residual_block(
+                output, block_name, filters, filters_inner, stride_init if i == 0 else 1
+            )
+
+        return output
+
+    def resnet_conv_x_body(self, input):
+        output = input
+        for i, (counts, filters, filters_inner) in enumerate(
+            zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
+        ):
+            stage_name = "layer%d" % (i + 1)
+            output = self.residual_stage(
+                output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2
+            )
+
+        return output
+
+    def resnet_stem(self, input):
+        conv1 = self._conv2d("conv1", input, 64, 7, 2)
+        conv1_bn = self._batch_norm_relu(conv1, "bn1")
+        pool1 = flow.nn.max_pool2d(
+            conv1_bn,
+            ksize=3,
+            strides=2,
+            padding="SAME",
+            data_format=self.data_format,
+            name="pool1",
+        )
+        return pool1
+
+
+def resnet50(images, args, trainable=True, training=True):
+    weight_regularizer = (
+        flow.regularizers.l2(args.wd) if args.wd > 0.0 and args.wd < 1.0 else None
+    )
+    builder = ResnetBuilder(
+        weight_regularizer,
+        trainable,
+        training,
+        args.channel_last,
+        args.fuse_bn_relu,
+        args.fuse_bn_add_relu,
+    )
+
+    # if args.pad_output:
+    #     if args.channel_last:
+    #         paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
+    #     else:
+    #         paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
+    #     images = flow.pad(images, paddings=paddings)
+    # with flow.scope.namespace("resnet50"):
+    stem = builder.resnet_stem(images)
+    body = builder.resnet_conv_x_body(stem)
+    pool5 = flow.nn.avg_pool2d(
+        body,
+        ksize=7,
+        strides=1,
+        padding="VALID",
+        data_format=builder.data_format,
+        name="avgpool",
+    )
+    fc1001 = flow.layers.dense(
+        flow.reshape(pool5, (pool5.shape[0], -1)),
+        units=1000,
+        use_bias=True,
+        kernel_initializer=flow.variance_scaling_initializer(
+            2, "fan_in", "random_normal"
+        ),
+        bias_initializer=flow.zeros_initializer(),
+        kernel_regularizer=weight_regularizer,
+        bias_regularizer=weight_regularizer,
+        trainable=trainable,
+        name="fc",
+    )
+    return fc1001
+

From 55921dbc35eea9b9a1648dc137d62a2e9ca9710b Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Thu, 26 Aug 2021 14:42:11 +0800
Subject: [PATCH 3/7] compaer with eager

---
 Classification/cnns/align.sh            | 56 +++++++++++++++++++++++++
 Classification/cnns/of_cnn_train_val.py | 18 ++++++--
 Classification/cnns/optimizer_util.py   |  7 +++-
 Classification/cnns/resnet_rename.py    |  4 ++
 Classification/cnns/util.py             | 30 ++++++++++++-
 5 files changed, 109 insertions(+), 6 deletions(-)
 create mode 100755 Classification/cnns/align.sh

diff --git a/Classification/cnns/align.sh b/Classification/cnns/align.sh
new file mode 100755
index 0000000..ca6cc06
--- /dev/null
+++ b/Classification/cnns/align.sh
@@ -0,0 +1,56 @@
+rm -rf core.*
+rm -rf ./output/snapshots/*
+
+if [ -n "$1" ]; then
+    NUM_EPOCH=$1
+else
+    NUM_EPOCH=50
+fi
+echo NUM_EPOCH=$NUM_EPOCH
+
+# training with imagenet
+if [ -n "$2" ]; then
+    DATA_ROOT=$2
+else
+    DATA_ROOT=/dataset/ImageNet/ofrecord
+fi
+echo DATA_ROOT=$DATA_ROOT
+
+LOG_FOLDER=../logs
+mkdir -p $LOG_FOLDER
+LOGFILE=$LOG_FOLDER/resnet_training.log
+
+export PYTHONUNBUFFERED=1
+echo PYTHONUNBUFFERED=$PYTHONUNBUFFERED
+export NCCL_LAUNCH_MODE=PARALLEL
+echo NCCL_LAUNCH_MODE=$NCCL_LAUNCH_MODE
+
+     #--momentum=0.875 \
+python3 of_cnn_train_val.py \
+     --train_data_dir=$DATA_ROOT/train \
+     --train_data_part_num=256 \
+     --val_data_dir=$DATA_ROOT/validation \
+     --val_data_part_num=256 \
+     --num_nodes=1 \
+     --model_load_dir=/ssd/xiexuan/models/resnet50/init_ckpt \
+     --gpu_num_per_node=1 \
+     --optimizer="sgd" \
+     --momentum=0.0 \
+     --lr_decay="none" \
+     --label_smoothing=0.1 \
+     --learning_rate=0.1 \
+     --loss_print_every_n_iter=1 \
+     --batch_size_per_device=64 \
+     --val_batch_size_per_device=64 \
+     --channel_last=False \
+     --pad_output \
+     --fuse_bn_relu=True \
+     --fuse_bn_add_relu=True \
+     --nccl_fusion_threshold_mb=16 \
+     --nccl_fusion_max_ops=24 \
+     --gpu_image_decoder=True \
+     --num_epoch=$NUM_EPOCH \
+     --model="resnet50" 2>&1 | tee ${LOGFILE}
+     # --use_fp16 \
+
+echo "Writting log to ${LOGFILE}"
diff --git a/Classification/cnns/of_cnn_train_val.py b/Classification/cnns/of_cnn_train_val.py
index 3258849..6ea6864 100755
--- a/Classification/cnns/of_cnn_train_val.py
+++ b/Classification/cnns/of_cnn_train_val.py
@@ -28,6 +28,7 @@
 import alexnet_model
 import inception_model
 import mobilenet_v2_model
+from util import build_watch_cb, build_watch_diff_cb
 
 parser = configs.get_parser()
 args = parser.parse_args()
@@ -52,7 +53,7 @@
 
 
 flow.config.gpu_device_num(args.gpu_num_per_node)
-# flow.config.enable_debug_mode(True)
+flow.config.enable_debug_mode(True)
 
 if args.use_fp16 and args.num_nodes * args.gpu_num_per_node > 1:
     flow.config.collective_boxing.nccl_fusion_all_reduce_use_buffer(False)
@@ -85,12 +86,15 @@ def TrainNet():
     if args.train_data_dir:
         assert os.path.exists(args.train_data_dir)
         print("Loading data from {}".format(args.train_data_dir))
-        (labels, images) = ofrecord_util.load_imagenet_for_training(args)
+        #(labels, images) = ofrecord_util.load_imagenet_for_training(args)
+        (labels, images) = ofrecord_util.load_imagenet_for_validation(args)
 
     else:
         print("Loading synthetic data.")
         (labels, images) = ofrecord_util.load_synthetic(args)
     logits = model_dict[args.model](images, args)
+    flow.watch(logits, build_watch_cb('logits'))
+    flow.watch_diff(logits, build_watch_diff_cb('logits_grad'))
     if args.label_smoothing > 0:
         one_hot_labels = label_smoothing(
             labels, args.num_classes, args.label_smoothing, logits.dtype
@@ -105,7 +109,7 @@ def TrainNet():
 
     loss = flow.math.reduce_mean(loss)
     predictions = flow.nn.softmax(logits)
-    outputs = {"loss": loss, "predictions": predictions, "labels": labels}
+    outputs = {"loss": loss, "predictions": predictions, "labels": labels, 'images': images, 'logits': logits}
 
     # set up warmup,learning rate and optimizer
     optimizer_util.set_up_optimizer(loss, args)
@@ -145,7 +149,13 @@ def main():
             loss_key="loss",
         )
         for i in range(epoch_size):
-            TrainNet().async_get(metric.metric_cb(epoch, i))
+            # TrainNet().async_get(metric.metric_cb(epoch, i))
+            a = TrainNet().get()
+            snapshot.save("epoch_{}_iter{}".format(epoch, i))
+            print(a['loss'].numpy())
+            if i>=1:
+                break
+        break
 
         if args.val_data_dir:
             metric = Metric(
diff --git a/Classification/cnns/optimizer_util.py b/Classification/cnns/optimizer_util.py
index 43cd977..a5f4193 100755
--- a/Classification/cnns/optimizer_util.py
+++ b/Classification/cnns/optimizer_util.py
@@ -116,6 +116,11 @@ def set_up_optimizer(loss, args):
             staircase=False,
             warmup=warmup,
         )
+    elif args.lr_decay == "none":
+        lr_scheduler = flow.optimizer.PiecewiseConstantScheduler(
+            boundaries=[],
+            values=[args.learning_rate],
+        )
     else:
         lr_scheduler = flow.optimizer.PiecewiseScalingScheduler(
             base_lr=args.learning_rate,
@@ -134,7 +139,7 @@ def set_up_optimizer(loss, args):
         print("Optimizer:  SGD")
         flow.optimizer.SGD(
             lr_scheduler,
-            momentum=args.momentum if args.momentum > 0 else None,
+            momentum=args.momentum if args.momentum > 0 else 0.0,
             grad_clipping=grad_clipping,
             loss_scale_policy=loss_scale_policy,
         ).minimize(loss)
diff --git a/Classification/cnns/resnet_rename.py b/Classification/cnns/resnet_rename.py
index 117dad4..eac8c7a 100644
--- a/Classification/cnns/resnet_rename.py
+++ b/Classification/cnns/resnet_rename.py
@@ -15,6 +15,7 @@
 """
 
 import oneflow.compatible.single_client as flow
+from util import build_watch_cb, build_watch_diff_cb
 
 BLOCK_COUNTS = [3, 4, 6, 3]
 BLOCK_FILTERS = [256, 512, 1024, 2048]
@@ -58,6 +59,9 @@ def _conv2d(
             model_name="weight",
             trainable=self.trainable,
         )
+        if 'conv1' ==  name:
+            flow.watch(weight, build_watch_cb('conv1_weight'))
+            flow.watch_diff(weight, build_watch_diff_cb('conv1_weight_grad'))
 
         return flow.nn.conv2d(
             input,
diff --git a/Classification/cnns/util.py b/Classification/cnns/util.py
index 6b7ce9a..c865cec 100755
--- a/Classification/cnns/util.py
+++ b/Classification/cnns/util.py
@@ -41,7 +41,8 @@ def __init__(self, model_save_dir, model_load_dir):
         if model_load_dir:
             assert os.path.isdir(model_load_dir)
             print("Restoring model from {}.".format(model_load_dir))
-            flow.load_variables(flow.checkpoint.get(model_load_dir))
+            flow.load_variables(flow.checkpoint.get(model_load_dir), ignore_mismatch=False)
+            # flow.checkpoint.save('loaded_init_ckpt')
         else:
             # flow.checkpoint.save("initial_model")
             print("Init model on demand.")
@@ -84,6 +85,15 @@ def match_top_k(predictions, labels, top_k=1):
     return num_matched, match_array.shape[0]
 
 
+def dump_outputs(outputs, step, dump_dir='output'):
+    for k, v in outputs.items():
+        root = os.path.join(dump_dir, str(step))
+        if not os.path.isdir(root):
+            os.makedirs(root)
+        path = os.path.join(root, k)
+        np.save(path, v.numpy())
+
+
 class Metric(object):
     def __init__(
         self,
@@ -142,6 +152,7 @@ def callback(outputs):
             self.num_samples += num_samples
 
             if (step + 1) % self.calculate_batches == 0:
+                dump_outputs(outputs, step)
                 throughput = self.num_samples / self.timer.split()
                 if self.prediction_key:
                     top_1_accuracy = self.top_1_num_matched / self.num_samples
@@ -180,3 +191,20 @@ def callback(outputs):
                 self._clear()
 
         return callback
+
+
+from oneflow.compatible.single_client import typing as tp
+
+def build_watch_cb(name, iter=0, root='output'):
+    path = os.path.join(root, str(iter), f'{name}.npy')
+    def cb(blob: tp.Numpy):
+        np.save(path, blob)
+    return cb
+
+
+def build_watch_diff_cb(name, iter=0, root='output'):
+    path = os.path.join(root, str(iter), f'{name}_grad.npy')
+    def cb(blob: tp.Numpy):
+        np.save(path, blob)
+    return cb
+

From d8a281d0bb7e83b3e16f31bb17f32e374d8c2f56 Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Thu, 26 Aug 2021 14:52:13 +0800
Subject: [PATCH 4/7] train.sh

---
 Classification/cnns/train.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Classification/cnns/train.sh b/Classification/cnns/train.sh
index 5a78d34..5444cf4 100755
--- a/Classification/cnns/train.sh
+++ b/Classification/cnns/train.sh
@@ -19,7 +19,6 @@ echo DATA_ROOT=$DATA_ROOT
 LOG_FOLDER=../logs
 mkdir -p $LOG_FOLDER
 LOGFILE=$LOG_FOLDER/resnet_training.log
-export PYTHONUNBUFFERED=1
 
 python3 of_cnn_train_val.py \
      --train_data_dir=$DATA_ROOT/train \
@@ -27,7 +26,7 @@ python3 of_cnn_train_val.py \
      --val_data_dir=$DATA_ROOT/validation \
      --val_data_part_num=256 \
      --num_nodes=1 \
-     --gpu_num_per_node=4 \
+     --gpu_num_per_node=8 \
      --optimizer="sgd" \
      --momentum=0.875 \
      --label_smoothing=0.1 \
@@ -37,6 +36,5 @@ python3 of_cnn_train_val.py \
      --val_batch_size_per_device=50 \
      --num_epoch=$NUM_EPOCH \
      --model="resnet50" 2>&1 | tee ${LOGFILE}
-     #--model="resnet50" 2>&1 | tee ${LOGFILE}
 
 echo "Writting log to ${LOGFILE}"

From 4a4cbda064a52491aa4a028aab1f4aef67d5fc7d Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Thu, 26 Aug 2021 14:55:23 +0800
Subject: [PATCH 5/7] manual modify

---
 Classification/cnns/train.sh      | 2 +-
 Classification/cnns/train_fp16.sh | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Classification/cnns/train.sh b/Classification/cnns/train.sh
index 5444cf4..6aa2b80 100755
--- a/Classification/cnns/train.sh
+++ b/Classification/cnns/train.sh
@@ -32,7 +32,7 @@ python3 of_cnn_train_val.py \
      --label_smoothing=0.1 \
      --learning_rate=1.024 \
      --loss_print_every_n_iter=100 \
-     --batch_size_per_device=32 \
+     --batch_size_per_device=128 \
      --val_batch_size_per_device=50 \
      --num_epoch=$NUM_EPOCH \
      --model="resnet50" 2>&1 | tee ${LOGFILE}
diff --git a/Classification/cnns/train_fp16.sh b/Classification/cnns/train_fp16.sh
index 0c59ef0..7ecfa5c 100755
--- a/Classification/cnns/train_fp16.sh
+++ b/Classification/cnns/train_fp16.sh
@@ -26,14 +26,18 @@ export NCCL_LAUNCH_MODE=PARALLEL
 echo NCCL_LAUNCH_MODE=$NCCL_LAUNCH_MODE
 
 python3 of_cnn_train_val.py \
+     --train_data_dir=$DATA_ROOT/train \
+     --train_data_part_num=256 \
+     --val_data_dir=$DATA_ROOT/validation \
+     --val_data_part_num=256 \
      --num_nodes=1 \
-     --gpu_num_per_node=1 \
+     --gpu_num_per_node=8 \
      --optimizer="sgd" \
      --momentum=0.875 \
      --label_smoothing=0.1 \
      --learning_rate=1.536 \
      --loss_print_every_n_iter=100 \
-     --batch_size_per_device=64 \
+     --batch_size_per_device=192 \
      --val_batch_size_per_device=50 \
      --use_fp16 \
      --channel_last=True \

From b95528d31131993edc6b6aba5829771f8fbb2150 Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Thu, 26 Aug 2021 16:10:13 +0800
Subject: [PATCH 6/7] fix conv algo

---
 Classification/cnns/align.sh             |   6 +-
 Classification/cnns/job_function_util.py |   3 +
 Classification/cnns/resnet_rename.py     | 107 +++++++++++------------
 3 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/Classification/cnns/align.sh b/Classification/cnns/align.sh
index ca6cc06..88d2273 100755
--- a/Classification/cnns/align.sh
+++ b/Classification/cnns/align.sh
@@ -43,14 +43,14 @@ python3 of_cnn_train_val.py \
      --batch_size_per_device=64 \
      --val_batch_size_per_device=64 \
      --channel_last=False \
-     --pad_output \
-     --fuse_bn_relu=True \
-     --fuse_bn_add_relu=True \
+     --fuse_bn_relu=False \
+     --fuse_bn_add_relu=False \
      --nccl_fusion_threshold_mb=16 \
      --nccl_fusion_max_ops=24 \
      --gpu_image_decoder=True \
      --num_epoch=$NUM_EPOCH \
      --model="resnet50" 2>&1 | tee ${LOGFILE}
      # --use_fp16 \
+     #--pad_output \
 
 echo "Writting log to ${LOGFILE}"
diff --git a/Classification/cnns/job_function_util.py b/Classification/cnns/job_function_util.py
index c651e95..3eaafba 100755
--- a/Classification/cnns/job_function_util.py
+++ b/Classification/cnns/job_function_util.py
@@ -26,6 +26,9 @@ def _default_config(args):
     if args.use_xla:
         config.use_xla_jit(True)
     config.enable_fuse_add_to_output(True)
+    config.cudnn_conv_force_fwd_algo(0)
+    config.cudnn_conv_force_bwd_data_algo(1)
+    config.cudnn_conv_force_bwd_filter_algo(1)
     return config
 
 
diff --git a/Classification/cnns/resnet_rename.py b/Classification/cnns/resnet_rename.py
index eac8c7a..784f924 100644
--- a/Classification/cnns/resnet_rename.py
+++ b/Classification/cnns/resnet_rename.py
@@ -96,56 +96,55 @@ def _batch_norm(self, inputs, name=None, last=False):
         )
 
     def _batch_norm_relu(self, inputs, name=None, last=False):
-        # if self.fuse_bn_relu:
-        #     initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-        #     axis = 1
-        #     if self.data_format == "NHWC":
-        #         axis = 3
-        #     return flow.layers.batch_normalization_relu(
-        #         inputs=inputs,
-        #         axis=axis,
-        #         momentum=0.9,
-        #         epsilon=1e-5,
-        #         center=True,
-        #         scale=True,
-        #         trainable=self.trainable,
-        #         training=self.training,
-        #         gamma_initializer=initializer,
-        #         moving_variance_initializer=initializer,
-        #         gamma_regularizer=self.weight_regularizer,
-        #         beta_regularizer=self.weight_regularizer,
-        #         name=name + "_bn_relu",
-        #     )
-        # else:
-            # return flow.nn.relu(self._batch_norm(inputs, name + "_bn", last=last))
-        return flow.nn.relu(self._batch_norm(inputs, name, last=last))
+        if self.fuse_bn_relu:
+            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
+            axis = 1
+            if self.data_format == "NHWC":
+                axis = 3
+            return flow.layers.batch_normalization_relu(
+                inputs=inputs,
+                axis=axis,
+                momentum=0.9,
+                epsilon=1e-5,
+                center=True,
+                scale=True,
+                trainable=self.trainable,
+                training=self.training,
+                gamma_initializer=initializer,
+                moving_variance_initializer=initializer,
+                gamma_regularizer=self.weight_regularizer,
+                beta_regularizer=self.weight_regularizer,
+                name=name + "_bn_relu",
+            )
+        else:
+            return flow.nn.relu(self._batch_norm(inputs, name, last=last))
 
     def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
-        # if self.fuse_bn_add_relu:
-        #     initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-        #     axis = 1
-        #     if self.data_format == "NHWC":
-        #         axis = 3
-        #     return flow.layers.batch_normalization_add_relu(
-        #         inputs=inputs,
-        #         addend=addend,
-        #         axis=axis,
-        #         momentum=0.9,
-        #         epsilon=1e-5,
-        #         center=True,
-        #         scale=True,
-        #         trainable=self.trainable,
-        #         training=self.training,
-        #         gamma_initializer=initializer,
-        #         moving_variance_initializer=initializer,
-        #         gamma_regularizer=self.weight_regularizer,
-        #         beta_regularizer=self.weight_regularizer,
-        #         name=name + "_bn_add_relu",
-        #     )
-        # else:
-        return flow.nn.relu(
-            self._batch_norm(inputs, name, last=last) + addend
-        )
+        if self.fuse_bn_add_relu:
+            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
+            axis = 1
+            if self.data_format == "NHWC":
+                axis = 3
+            return flow.layers.batch_normalization_add_relu(
+                inputs=inputs,
+                addend=addend,
+                axis=axis,
+                momentum=0.9,
+                epsilon=1e-5,
+                center=True,
+                scale=True,
+                trainable=self.trainable,
+                training=self.training,
+                gamma_initializer=initializer,
+                moving_variance_initializer=initializer,
+                gamma_regularizer=self.weight_regularizer,
+                beta_regularizer=self.weight_regularizer,
+                name=name + "_bn_add_relu",
+            )
+        else:
+            return flow.nn.relu(
+                self._batch_norm(inputs, name, last=last) + addend
+            )
 
     def conv2d_affine(self, input, name, filters, kernel_size, strides):
         padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
@@ -231,12 +230,12 @@ def resnet50(images, args, trainable=True, training=True):
         args.fuse_bn_add_relu,
     )
 
-    # if args.pad_output:
-    #     if args.channel_last:
-    #         paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
-    #     else:
-    #         paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
-    #     images = flow.pad(images, paddings=paddings)
+    if args.pad_output:
+        if args.channel_last:
+            paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
+        else:
+            paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
+        images = flow.pad(images, paddings=paddings)
     # with flow.scope.namespace("resnet50"):
     stem = builder.resnet_stem(images)
     body = builder.resnet_conv_x_body(stem)

From 708e293ccf851fc45e5790b1830186247a3a6a1f Mon Sep 17 00:00:00 2001
From: ShawnXuan <xiexuanx2@gmail.com>
Date: Thu, 26 Aug 2021 16:16:50 +0800
Subject: [PATCH 7/7] rm rename.py

---
 Classification/cnns/of_cnn_train_val.py |   9 +-
 Classification/cnns/resnet_model.py     |  83 ++++----
 Classification/cnns/resnet_rename.py    | 264 ------------------------
 3 files changed, 48 insertions(+), 308 deletions(-)
 delete mode 100644 Classification/cnns/resnet_rename.py

diff --git a/Classification/cnns/of_cnn_train_val.py b/Classification/cnns/of_cnn_train_val.py
index 6ea6864..440fbf9 100755
--- a/Classification/cnns/of_cnn_train_val.py
+++ b/Classification/cnns/of_cnn_train_val.py
@@ -21,8 +21,7 @@
 import config as configs
 from util import Snapshot, InitNodes, Metric
 from job_function_util import get_train_config, get_val_config
-# import resnet_model
-import resnet_rename as resnet_model
+import resnet_model
 import resnext_model
 import vgg_model
 import alexnet_model
@@ -151,9 +150,9 @@ def main():
         for i in range(epoch_size):
             # TrainNet().async_get(metric.metric_cb(epoch, i))
             a = TrainNet().get()
-            snapshot.save("epoch_{}_iter{}".format(epoch, i))
-            print(a['loss'].numpy())
-            if i>=1:
+            # snapshot.save("epoch_{}_iter{}".format(epoch, i))
+            print('loss:', a['loss'].numpy())
+            if i>=100:
                 break
         break
 
diff --git a/Classification/cnns/resnet_model.py b/Classification/cnns/resnet_model.py
index 7e9c1fc..784f924 100755
--- a/Classification/cnns/resnet_model.py
+++ b/Classification/cnns/resnet_model.py
@@ -15,6 +15,7 @@
 """
 
 import oneflow.compatible.single_client as flow
+from util import build_watch_cb, build_watch_diff_cb
 
 BLOCK_COUNTS = [3, 4, 6, 3]
 BLOCK_FILTERS = [256, 512, 1024, 2048]
@@ -50,7 +51,7 @@ def _conv2d(
         else:
             shape = (filters, input.shape[1], kernel_size, kernel_size)
         weight = flow.get_variable(
-            name + "-weight",
+            name + ".weight",
             shape=shape,
             dtype=input.dtype,
             initializer=self.weight_initializer,
@@ -58,6 +59,9 @@ def _conv2d(
             model_name="weight",
             trainable=self.trainable,
         )
+        if 'conv1' ==  name:
+            flow.watch(weight, build_watch_cb('conv1_weight'))
+            flow.watch_diff(weight, build_watch_diff_cb('conv1_weight_grad'))
 
         return flow.nn.conv2d(
             input,
@@ -113,7 +117,7 @@ def _batch_norm_relu(self, inputs, name=None, last=False):
                 name=name + "_bn_relu",
             )
         else:
-            return flow.nn.relu(self._batch_norm(inputs, name + "_bn", last=last))
+            return flow.nn.relu(self._batch_norm(inputs, name, last=last))
 
     def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
         if self.fuse_bn_add_relu:
@@ -139,7 +143,7 @@ def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
             )
         else:
             return flow.nn.relu(
-                self._batch_norm(inputs, name + "_bn", last=last) + addend
+                self._batch_norm(inputs, name, last=last) + addend
             )
 
     def conv2d_affine(self, input, name, filters, kernel_size, strides):
@@ -150,21 +154,21 @@ def conv2d_affine(self, input, name, filters, kernel_size, strides):
     def bottleneck_transformation(
         self, input, block_name, filters, filters_inner, strides
     ):
-        a = self.conv2d_affine(input, block_name + "_branch2a", filters_inner, 1, 1)
-        a = self._batch_norm_relu(a, block_name + "_branch2a")
+        a = self.conv2d_affine(input, block_name + ".conv1", filters_inner, 1, 1)
+        a = self._batch_norm_relu(a, block_name + ".bn1")
 
-        b = self.conv2d_affine(a, block_name + "_branch2b", filters_inner, 3, strides)
-        b = self._batch_norm_relu(b, block_name + "_branch2b")
+        b = self.conv2d_affine(a, block_name + ".conv2", filters_inner, 3, strides)
+        b = self._batch_norm_relu(b, block_name + ".bn2")
 
-        c = self.conv2d_affine(b, block_name + "_branch2c", filters, 1, 1)
+        c = self.conv2d_affine(b, block_name + ".conv3", filters, 1, 1)
         return c
 
     def residual_block(self, input, block_name, filters, filters_inner, strides_init):
-        if strides_init != 1 or block_name == "res2_0":
+        if strides_init != 1 or block_name == "layer1.0":
             shortcut = self.conv2d_affine(
-                input, block_name + "_branch1", filters, 1, strides_init
+                input, block_name + ".downsample.0", filters, 1, strides_init
             )
-            shortcut = self._batch_norm(shortcut, block_name + "_branch1_bn")
+            shortcut = self._batch_norm(shortcut, block_name + ".downsample.1")
         else:
             shortcut = input
 
@@ -172,7 +176,7 @@ def residual_block(self, input, block_name, filters, filters_inner, strides_init
             input, block_name, filters, filters_inner, strides_init,
         )
         return self._batch_norm_add_relu(
-            bottleneck, shortcut, block_name + "_branch2c", last=True
+            bottleneck, shortcut, block_name + ".bn3", last=True
         )
 
     def residual_stage(
@@ -180,7 +184,7 @@ def residual_stage(
     ):
         output = input
         for i in range(counts):
-            block_name = "%s_%d" % (stage_name, i)
+            block_name = "%s.%d" % (stage_name, i)
             output = self.residual_block(
                 output, block_name, filters, filters_inner, stride_init if i == 0 else 1
             )
@@ -192,7 +196,7 @@ def resnet_conv_x_body(self, input):
         for i, (counts, filters, filters_inner) in enumerate(
             zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
         ):
-            stage_name = "res%d" % (i + 2)
+            stage_name = "layer%d" % (i + 1)
             output = self.residual_stage(
                 output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2
             )
@@ -201,7 +205,7 @@ def resnet_conv_x_body(self, input):
 
     def resnet_stem(self, input):
         conv1 = self._conv2d("conv1", input, 64, 7, 2)
-        conv1_bn = self._batch_norm_relu(conv1, "conv1")
+        conv1_bn = self._batch_norm_relu(conv1, "bn1")
         pool1 = flow.nn.max_pool2d(
             conv1_bn,
             ksize=3,
@@ -232,28 +236,29 @@ def resnet50(images, args, trainable=True, training=True):
         else:
             paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
         images = flow.pad(images, paddings=paddings)
-    with flow.scope.namespace("Resnet"):
-        stem = builder.resnet_stem(images)
-        body = builder.resnet_conv_x_body(stem)
-        pool5 = flow.nn.avg_pool2d(
-            body,
-            ksize=7,
-            strides=1,
-            padding="VALID",
-            data_format=builder.data_format,
-            name="pool5",
-        )
-        fc1001 = flow.layers.dense(
-            flow.reshape(pool5, (pool5.shape[0], -1)),
-            units=1000,
-            use_bias=True,
-            kernel_initializer=flow.variance_scaling_initializer(
-                2, "fan_in", "random_normal"
-            ),
-            bias_initializer=flow.zeros_initializer(),
-            kernel_regularizer=weight_regularizer,
-            bias_regularizer=weight_regularizer,
-            trainable=trainable,
-            name="fc1001",
-        )
+    # with flow.scope.namespace("resnet50"):
+    stem = builder.resnet_stem(images)
+    body = builder.resnet_conv_x_body(stem)
+    pool5 = flow.nn.avg_pool2d(
+        body,
+        ksize=7,
+        strides=1,
+        padding="VALID",
+        data_format=builder.data_format,
+        name="avgpool",
+    )
+    fc1001 = flow.layers.dense(
+        flow.reshape(pool5, (pool5.shape[0], -1)),
+        units=1000,
+        use_bias=True,
+        kernel_initializer=flow.variance_scaling_initializer(
+            2, "fan_in", "random_normal"
+        ),
+        bias_initializer=flow.zeros_initializer(),
+        kernel_regularizer=weight_regularizer,
+        bias_regularizer=weight_regularizer,
+        trainable=trainable,
+        name="fc",
+    )
     return fc1001
+
diff --git a/Classification/cnns/resnet_rename.py b/Classification/cnns/resnet_rename.py
deleted file mode 100644
index 784f924..0000000
--- a/Classification/cnns/resnet_rename.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import oneflow.compatible.single_client as flow
-from util import build_watch_cb, build_watch_diff_cb
-
-BLOCK_COUNTS = [3, 4, 6, 3]
-BLOCK_FILTERS = [256, 512, 1024, 2048]
-BLOCK_FILTERS_INNER = [64, 128, 256, 512]
-
-
-class ResnetBuilder(object):
-    def __init__(
-        self,
-        weight_regularizer,
-        trainable=True,
-        training=True,
-        channel_last=False,
-        fuse_bn_relu=True,
-        fuse_bn_add_relu=True,
-    ):
-        self.data_format = "NHWC" if channel_last else "NCHW"
-        self.weight_initializer = flow.variance_scaling_initializer(
-            2, "fan_in", "random_normal", data_format=self.data_format
-        )
-        self.weight_regularizer = weight_regularizer
-        self.trainable = trainable
-        self.training = training
-        self.fuse_bn_relu = fuse_bn_relu
-        self.fuse_bn_add_relu = fuse_bn_add_relu
-
-    def _conv2d(
-        self, name, input, filters, kernel_size, strides=1, padding="SAME", dilations=1,
-    ):
-        # There are different shapes of weight metric between 'NCHW' and 'NHWC' mode
-        if self.data_format == "NHWC":
-            shape = (filters, kernel_size, kernel_size, input.shape[3])
-        else:
-            shape = (filters, input.shape[1], kernel_size, kernel_size)
-        weight = flow.get_variable(
-            name + ".weight",
-            shape=shape,
-            dtype=input.dtype,
-            initializer=self.weight_initializer,
-            regularizer=self.weight_regularizer,
-            model_name="weight",
-            trainable=self.trainable,
-        )
-        if 'conv1' ==  name:
-            flow.watch(weight, build_watch_cb('conv1_weight'))
-            flow.watch_diff(weight, build_watch_diff_cb('conv1_weight_grad'))
-
-        return flow.nn.conv2d(
-            input,
-            weight,
-            strides,
-            padding,
-            None,
-            self.data_format,
-            dilations,
-            name=name,
-        )
-
-    def _batch_norm(self, inputs, name=None, last=False):
-        initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-        axis = 1
-        if self.data_format == "NHWC":
-            axis = 3
-        return flow.layers.batch_normalization(
-            inputs=inputs,
-            axis=axis,
-            momentum=0.9,  # 97,
-            epsilon=1e-5,
-            center=True,
-            scale=True,
-            trainable=self.trainable,
-            training=self.training,
-            gamma_initializer=initializer,
-            moving_variance_initializer=initializer,
-            gamma_regularizer=self.weight_regularizer,
-            beta_regularizer=self.weight_regularizer,
-            name=name,
-        )
-
-    def _batch_norm_relu(self, inputs, name=None, last=False):
-        if self.fuse_bn_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format == "NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_relu(
-                inputs=inputs,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name + "_bn_relu",
-            )
-        else:
-            return flow.nn.relu(self._batch_norm(inputs, name, last=last))
-
-    def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
-        if self.fuse_bn_add_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format == "NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_add_relu(
-                inputs=inputs,
-                addend=addend,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name + "_bn_add_relu",
-            )
-        else:
-            return flow.nn.relu(
-                self._batch_norm(inputs, name, last=last) + addend
-            )
-
-    def conv2d_affine(self, input, name, filters, kernel_size, strides):
-        padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
-        output = self._conv2d(name, input, filters, kernel_size, strides, padding)
-        return output
-
-    def bottleneck_transformation(
-        self, input, block_name, filters, filters_inner, strides
-    ):
-        a = self.conv2d_affine(input, block_name + ".conv1", filters_inner, 1, 1)
-        a = self._batch_norm_relu(a, block_name + ".bn1")
-
-        b = self.conv2d_affine(a, block_name + ".conv2", filters_inner, 3, strides)
-        b = self._batch_norm_relu(b, block_name + ".bn2")
-
-        c = self.conv2d_affine(b, block_name + ".conv3", filters, 1, 1)
-        return c
-
-    def residual_block(self, input, block_name, filters, filters_inner, strides_init):
-        if strides_init != 1 or block_name == "layer1.0":
-            shortcut = self.conv2d_affine(
-                input, block_name + ".downsample.0", filters, 1, strides_init
-            )
-            shortcut = self._batch_norm(shortcut, block_name + ".downsample.1")
-        else:
-            shortcut = input
-
-        bottleneck = self.bottleneck_transformation(
-            input, block_name, filters, filters_inner, strides_init,
-        )
-        return self._batch_norm_add_relu(
-            bottleneck, shortcut, block_name + ".bn3", last=True
-        )
-
-    def residual_stage(
-        self, input, stage_name, counts, filters, filters_inner, stride_init=2
-    ):
-        output = input
-        for i in range(counts):
-            block_name = "%s.%d" % (stage_name, i)
-            output = self.residual_block(
-                output, block_name, filters, filters_inner, stride_init if i == 0 else 1
-            )
-
-        return output
-
-    def resnet_conv_x_body(self, input):
-        output = input
-        for i, (counts, filters, filters_inner) in enumerate(
-            zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
-        ):
-            stage_name = "layer%d" % (i + 1)
-            output = self.residual_stage(
-                output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2
-            )
-
-        return output
-
-    def resnet_stem(self, input):
-        conv1 = self._conv2d("conv1", input, 64, 7, 2)
-        conv1_bn = self._batch_norm_relu(conv1, "bn1")
-        pool1 = flow.nn.max_pool2d(
-            conv1_bn,
-            ksize=3,
-            strides=2,
-            padding="SAME",
-            data_format=self.data_format,
-            name="pool1",
-        )
-        return pool1
-
-
-def resnet50(images, args, trainable=True, training=True):
-    weight_regularizer = (
-        flow.regularizers.l2(args.wd) if args.wd > 0.0 and args.wd < 1.0 else None
-    )
-    builder = ResnetBuilder(
-        weight_regularizer,
-        trainable,
-        training,
-        args.channel_last,
-        args.fuse_bn_relu,
-        args.fuse_bn_add_relu,
-    )
-
-    if args.pad_output:
-        if args.channel_last:
-            paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
-        else:
-            paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
-        images = flow.pad(images, paddings=paddings)
-    # with flow.scope.namespace("resnet50"):
-    stem = builder.resnet_stem(images)
-    body = builder.resnet_conv_x_body(stem)
-    pool5 = flow.nn.avg_pool2d(
-        body,
-        ksize=7,
-        strides=1,
-        padding="VALID",
-        data_format=builder.data_format,
-        name="avgpool",
-    )
-    fc1001 = flow.layers.dense(
-        flow.reshape(pool5, (pool5.shape[0], -1)),
-        units=1000,
-        use_bias=True,
-        kernel_initializer=flow.variance_scaling_initializer(
-            2, "fan_in", "random_normal"
-        ),
-        bias_initializer=flow.zeros_initializer(),
-        kernel_regularizer=weight_regularizer,
-        bias_regularizer=weight_regularizer,
-        trainable=trainable,
-        name="fc",
-    )
-    return fc1001
-