Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
259 views
in Technique[技术] by (71.8m points)

python - The loss did not converge when writing a training loop from scratch with TensorFlow 2.2 and Tensorflow 2.3

If I use fit in training my network. The loss function converges, and the metric (accuracy) is significantly improved.

However, I write the training process from scratch by myself. I found that the loss function not converges, and the metric (accuracy) isn't also improved.

My code is the following (don't mind with network architecture, you can design other model and try).

import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, Dense, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.InteractiveSession(config=config)

def read_dataset(num_classes=100):
    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)
    return x_train, y_train, x_test, y_test

def basic_model(input_shape, num_classes):
    inps = Input(shape=input_shape)

    x = Conv2D(32, kernel_size=(7,7), strides=(2,2), padding='same', use_bias=False,
               kernel_initializer='he_normal',kernel_regularizer=l2(5e-4))(inps)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(64, kernel_size=(3, 3), strides=(2, 2), padding='same', use_bias=False,
               kernel_initializer='he_normal',kernel_regularizer=l2(5e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same', use_bias=False,
               kernel_initializer='he_normal',kernel_regularizer=l2(5e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = GlobalAveragePooling2D()(x)
    out = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=inps, outputs=out)

def train(input_shape, num_classes=100, num_epochs=20, batch_size=32, lr_init=0.001):
    X_train, y_train, X_test, y_test = read_dataset()
    print('X_train shape', X_train.shape)
    print('y_train shape', y_train.shape)
    print('X_test shape', X_test.shape)
    print('y_test shape', y_test.shape)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
    test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size)

    model = basic_model(input_shape, num_classes)
    model.summary(line_length=100)

    sgd = SGD(learning_rate=lr_init, momentum=0.99, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_dataset, epochs=num_epochs, verbose=1, validation_data=test_dataset)

class Custom_Model(Model):
    def __init__(self, model):
        super(Custom_Model, self).__init__()
        self.model = model

    def compile(
        self,
        optimizer,
        metrics,
        loss_fn
    ):
        super(Custom_Model, self).compile(optimizer=optimizer, metrics=metrics)
        self.loss_fn = loss_fn

    def train_step(self, data):
        # Unpack data
        x, y = data

        with tf.GradientTape() as tape:
            # Forward pass of student
            predictions = self.model(x, training=True)

            # Compute losses
            loss = self.loss_fn(y, predictions)
            loss += sum(self.model.losses)

        # Compute gradients
        trainable_vars = self.model.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"loss": loss})
        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.model(x, training=False)

        # Calculate the loss
        loss = self.loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"val_loss": loss})
        return results


def train_loop(input_shape, num_classes=100, num_epochs=20, batch_size=32, lr_init=0.001):
    X_train, y_train, X_test, y_test = read_dataset()
    print('X_train shape', X_train.shape)
    print('y_train shape', y_train.shape)
    print('X_test shape', X_test.shape)
    print('y_test shape', y_test.shape)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
    test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size)

    model = basic_model(input_shape, num_classes)
    model.summary(line_length=100)

    sgd = SGD(learning_rate=lr_init, momentum=0.99, nesterov=True)
    custom_model = Custom_Model(model=model)
    custom_model.compile(optimizer=sgd, loss_fn=CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

    custom_model.fit(train_dataset, epochs=num_epochs, verbose=1, validation_data=test_dataset)


def main():
    input_shape = (32,32,3)
    num_classes = 100
    num_epochs = 20
    batch_size = 32
    lr_init = 0.001

    print(tf.__version__)

    print('-----------------Training with fit-----------------------------')
    train(input_shape, num_classes, num_epochs, batch_size, lr_init)

    print('-----------------Training loop from scratch-----------------------------')
    train_loop(input_shape, num_classes, num_epochs, batch_size, lr_init)


if __name__ == '__main__':
    main()

Here is the result when I use model.fit as usual: model.fit

Here is the result when I use the training loop from scratch: training loop from scratch

The graph of the loss value between model.fit and training loop: graph


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)
等待大神答复

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...